From 133e88e9f9a3c511149544fa882739647a78c576 Mon Sep 17 00:00:00 2001
From: Hogne <hogne@tinyagentos.dev>
Date: Sun, 31 May 2026 12:01:35 +0200
Subject: [PATCH 1/6] feat(#333): promote archived models when compatible
 hardware joins

Add model archive promotion engine: when a new worker registers with
hardware that can run previously-archived models, automatically promote
them from ~/taos/archive/models/ to the active models tree.

- model_archive.py: archive scan, hardware compatibility check,
  model promotion (move + manifest cleanup), notifications
- ClusterManager.register_worker: auto-trigger promotion on worker join
- GET /api/cluster/promote-archived: manual promotion scan endpoint
- 29 unit tests covering compatibility checks, archive I/O,
  promotable filtering, and file movement
---
 tests/test_model_archive.py          | 383 +++++++++++++++++++++++++++
 tinyagentos/cluster/manager.py       |  17 ++
 tinyagentos/cluster/model_archive.py | 257 ++++++++++++++++++
 tinyagentos/routes/cluster.py        |  54 ++++
 4 files changed, 711 insertions(+)
 create mode 100644 tests/test_model_archive.py
 create mode 100644 tinyagentos/cluster/model_archive.py

diff --git a/tests/test_model_archive.py b/tests/test_model_archive.py
new file mode 100644
index 00000000..2a681eca
--- /dev/null
+++ b/tests/test_model_archive.py
@@ -0,0 +1,383 @@
+"""Tests for the model archive promotion engine."""
+from __future__ import annotations
+
+import json
+import shutil
+from pathlib import Path
+
+import pytest
+
+from tinyagentos.cluster.model_archive import (
+    _archive_root,
+    _active_models_root,
+    _worker_can_run,
+    find_promotable,
+    list_archived_models,
+    promote_model,
+)
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def _make_worker_hw(
+    ram_mb: int = 8192,
+    gpu_type: str = "nvidia",
+    gpu_cuda: bool = True,
+    vram_mb: int = 8192,
+    arch: str = "x86_64",
+) -> dict:
+    return {
+        "ram_mb": ram_mb,
+        "gpu": {
+            "type": gpu_type,
+            "cuda": gpu_cuda,
+            "vram_mb": vram_mb,
+        },
+        "cpu": {"arch": arch},
+        "npu": {"type": "none"},
+    }
+
+
+def _write_archive_manifest(
+    archive_dir: Path,
+    model_id: str,
+    requirements: dict | None = None,
+    files: list[str] | None = None,
+    backend: str = "llama-cpp",
+    family: str = "qwen3",
+) -> dict:
+    """Write a manifest AND create a dummy model-files dir so
+    :func:`promote_model` has something to move.
+    """
+    manifest = {
+        "model_id": model_id,
+        "backend": backend,
+        "family": family,
+        "files": files or [f"{model_id}-Q4_K_M.gguf"],
+        "requirements": requirements or {},
+        "archived_at": 1700000000.0,
+    }
+    manifest_path = archive_dir / f"{model_id}.json"
+    manifest_path.write_text(json.dumps(manifest))
+    # Create the accompanying model files directory
+    files_dir = archive_dir / model_id
+    files_dir.mkdir(parents=True, exist_ok=True)
+    (files_dir / f"{model_id}-Q4_K_M.gguf").write_text("fake-model-data")
+    return manifest
+
+
+# ---------------------------------------------------------------------------
+# _worker_can_run
+# ---------------------------------------------------------------------------
+
+
+class TestWorkerCanRun:
+    def test_empty_requirements(self):
+        assert _worker_can_run(_make_worker_hw(), {}) is True
+
+    def test_vram_met(self):
+        assert _worker_can_run(
+            _make_worker_hw(vram_mb=8192),
+            {"min_vram_mb": 4096},
+        ) is True
+
+    def test_vram_not_met(self):
+        assert _worker_can_run(
+            _make_worker_hw(vram_mb=4096),
+            {"min_vram_mb": 8192},
+        ) is False
+
+    def test_ram_met(self):
+        assert _worker_can_run(
+            _make_worker_hw(ram_mb=16384),
+            {"min_ram_mb": 8192},
+        ) is True
+
+    def test_ram_not_met(self):
+        assert _worker_can_run(
+            _make_worker_hw(ram_mb=4096),
+            {"min_ram_mb": 8192},
+        ) is False
+
+    def test_gpu_type_nvidia_match(self):
+        assert _worker_can_run(
+            _make_worker_hw(gpu_type="nvidia"),
+            {"gpu_type": "nvidia"},
+        ) is True
+
+    def test_gpu_type_nvidia_mismatch(self):
+        assert _worker_can_run(
+            _make_worker_hw(gpu_type="amd"),
+            {"gpu_type": "nvidia"},
+        ) is False
+
+    def test_gpu_accel_cuda_match(self):
+        assert _worker_can_run(
+            _make_worker_hw(gpu_type="nvidia", gpu_cuda=True),
+            {"gpu_accel": "cuda"},
+        ) is True
+
+    def test_gpu_accel_cuda_mismatch(self):
+        assert _worker_can_run(
+            _make_worker_hw(gpu_type="nvidia", gpu_cuda=False),
+            {"gpu_accel": "cuda"},
+        ) is False
+
+    def test_arch_match(self):
+        assert _worker_can_run(
+            _make_worker_hw(arch="x86_64"),
+            {"arch": "x86_64"},
+        ) is True
+
+    def test_arch_mismatch(self):
+        assert _worker_can_run(
+            _make_worker_hw(arch="aarch64"),
+            {"arch": "x86_64"},
+        ) is False
+
+    def test_apple_silicon_unified_memory(self):
+        worker_hw = {
+            "ram_mb": 16384,
+            "gpu": {"type": "apple", "vulkan": True, "vram_mb": 16384},
+            "cpu": {"arch": "aarch64"},
+            "npu": {"type": "none"},
+        }
+        assert _worker_can_run(worker_hw, {"min_vram_mb": 8192}) is True
+
+    def test_all_requirements_met(self):
+        worker = _make_worker_hw(vram_mb=12288, ram_mb=16384, gpu_type="nvidia", gpu_cuda=True)
+        reqs = {
+            "min_vram_mb": 8192,
+            "min_ram_mb": 8192,
+            "gpu_type": "nvidia",
+            "gpu_accel": "cuda",
+            "arch": "x86_64",
+        }
+        assert _worker_can_run(worker, reqs) is True
+
+    def test_one_requirement_fails_whole_thing_fails(self):
+        worker = _make_worker_hw(vram_mb=4096, ram_mb=16384, gpu_type="nvidia", gpu_cuda=True)
+        reqs = {
+            "min_vram_mb": 8192,
+            "min_ram_mb": 8192,
+            "gpu_type": "nvidia",
+            "gpu_accel": "cuda",
+        }
+        assert _worker_can_run(worker, reqs) is False
+
+
+# ---------------------------------------------------------------------------
+# list_archived_models
+# ---------------------------------------------------------------------------
+
+
+class TestListArchivedModels:
+    def test_empty_dir(self, tmp_path: Path):
+        assert list_archived_models(tmp_path) == []
+
+    def test_nonexistent_dir(self, tmp_path: Path):
+        assert list_archived_models(tmp_path / "nonexistent") == []
+
+    def test_single_manifest(self, tmp_path: Path):
+        _write_archive_manifest(tmp_path, "qwen3.5-4b")
+        result = list_archived_models(tmp_path)
+        assert len(result) == 1
+        assert result[0]["model_id"] == "qwen3.5-4b"
+        assert "manifest_path" in result[0]
+
+    def test_skips_corrupt_manifest(self, tmp_path: Path):
+        (tmp_path / "bad.json").write_text("not json")
+        _write_archive_manifest(tmp_path, "gemma-4-e2b")
+        result = list_archived_models(tmp_path)
+        assert len(result) == 1
+        assert result[0]["model_id"] == "gemma-4-e2b"
+
+    def test_multiple_manifests_sorted(self, tmp_path: Path):
+        _write_archive_manifest(tmp_path, "llama3-8b")
+        _write_archive_manifest(tmp_path, "gemma-4-e2b")
+        _write_archive_manifest(tmp_path, "qwen3.5-4b")
+        result = list_archived_models(tmp_path)
+        ids = [m["model_id"] for m in result]
+        # Sorted alphabetically by filename
+        assert ids == sorted(ids)
+
+
+# ---------------------------------------------------------------------------
+# find_promotable
+# ---------------------------------------------------------------------------
+
+
+class TestFindPromotable:
+    def test_no_archive(self, tmp_path: Path):
+        worker = _make_worker_hw()
+        assert find_promotable(worker, "test-worker", tmp_path) == []
+
+    def test_no_compatible(self, tmp_path: Path):
+        # Archive a model requiring 16GB VRAM; worker has 8GB
+        _write_archive_manifest(
+            tmp_path, "big-model",
+            requirements={"min_vram_mb": 16384},
+        )
+        worker = _make_worker_hw(vram_mb=8192)
+        assert find_promotable(worker, "test-worker", tmp_path) == []
+
+    def test_compatible_found(self, tmp_path: Path):
+        _write_archive_manifest(
+            tmp_path, "qwen3.5-4b",
+            requirements={"min_vram_mb": 4096, "gpu_accel": "cuda"},
+        )
+        worker = _make_worker_hw(vram_mb=8192)
+        result = find_promotable(worker, "test-worker", tmp_path)
+        assert len(result) == 1
+        assert result[0]["model_id"] == "qwen3.5-4b"
+        assert result[0]["worker_name"] == "test-worker"
+
+    def test_mixed_compatible_and_incompatible(self, tmp_path: Path):
+        _write_archive_manifest(
+            tmp_path, "qwen3.5-4b",
+            requirements={"min_vram_mb": 4096},
+        )
+        _write_archive_manifest(
+            tmp_path, "big-model",
+            requirements={"min_vram_mb": 32768},
+        )
+        worker = _make_worker_hw(vram_mb=8192)
+        result = find_promotable(worker, "test-worker", tmp_path)
+        assert len(result) == 1
+        assert result[0]["model_id"] == "qwen3.5-4b"
+
+    def test_no_requirements_always_promotable(self, tmp_path: Path):
+        _write_archive_manifest(tmp_path, "no-reqs-model", requirements={})
+        worker = _make_worker_hw(vram_mb=128)  # very weak
+        result = find_promotable(worker, "test-worker", tmp_path)
+        assert len(result) == 1
+
+
+# ---------------------------------------------------------------------------
+# promote_model
+# ---------------------------------------------------------------------------
+
+
+class TestPromoteModel:
+    def test_promote_moves_files_and_removes_manifest(self, tmp_path: Path, monkeypatch):
+        archive_dir = tmp_path / "archive"
+        active_dir = tmp_path / "active"
+        archive_dir.mkdir(parents=True, exist_ok=True)
+        active_dir.mkdir(parents=True, exist_ok=True)
+
+        # Override paths for test isolation
+        monkeypatch.setattr(
+            "tinyagentos.cluster.model_archive._archive_root",
+            lambda: archive_dir,
+        )
+        monkeypatch.setattr(
+            "tinyagentos.cluster.model_archive._active_models_root",
+            lambda: active_dir,
+        )
+
+        _write_archive_manifest(
+            archive_dir, "qwen3.5-4b",
+            requirements={"min_vram_mb": 4096},
+            backend="llama-cpp",
+            family="qwen3.5",
+        )
+        models = list_archived_models(archive_dir)
+        assert len(models) == 1
+
+        ok = promote_model(models[0])
+        assert ok is True
+
+        # Manifest removed
+        assert not (archive_dir / "qwen3.5-4b.json").exists()
+        # Files dir moved
+        assert not (archive_dir / "qwen3.5-4b").is_dir()
+        target = active_dir / "llama-cpp" / "qwen3.5" / "qwen3.5-4b"
+        assert target.is_dir()
+        assert (target / "qwen3.5-4b-Q4_K_M.gguf").read_text() == "fake-model-data"
+
+    def test_promote_skips_when_target_exists(self, tmp_path: Path, monkeypatch):
+        archive_dir = tmp_path / "archive"
+        active_dir = tmp_path / "active"
+        archive_dir.mkdir(parents=True, exist_ok=True)
+        active_dir.mkdir(parents=True, exist_ok=True)
+
+        monkeypatch.setattr(
+            "tinyagentos.cluster.model_archive._archive_root",
+            lambda: archive_dir,
+        )
+        monkeypatch.setattr(
+            "tinyagentos.cluster.model_archive._active_models_root",
+            lambda: active_dir,
+        )
+
+        _write_archive_manifest(
+            archive_dir, "qwen3.5-4b",
+            backend="llama-cpp",
+            family="qwen3.5",
+        )
+        # Pre-create target dir
+        target_dir = active_dir / "llama-cpp" / "qwen3.5" / "qwen3.5-4b"
+        target_dir.mkdir(parents=True, exist_ok=True)
+        (target_dir / "existing.gguf").write_text("preexisting")
+
+        models = list_archived_models(archive_dir)
+        ok = promote_model(models[0])
+        assert ok is True
+        # Manifest removed, but existing target untouched
+        assert not (archive_dir / "qwen3.5-4b.json").exists()
+        assert (target_dir / "existing.gguf").read_text() == "preexisting"
+
+    def test_promote_fails_without_files_dir(self, tmp_path: Path, monkeypatch):
+        archive_dir = tmp_path / "archive"
+        active_dir = tmp_path / "active"
+        archive_dir.mkdir(parents=True, exist_ok=True)
+        active_dir.mkdir(parents=True, exist_ok=True)
+
+        monkeypatch.setattr(
+            "tinyagentos.cluster.model_archive._archive_root",
+            lambda: archive_dir,
+        )
+        monkeypatch.setattr(
+            "tinyagentos.cluster.model_archive._active_models_root",
+            lambda: active_dir,
+        )
+
+        # Create manifest without files dir
+        manifest = {
+            "model_id": "orphan-model",
+            "backend": "llama-cpp",
+            "family": "orphan",
+            "files": ["orphan.gguf"],
+            "requirements": {},
+            "archived_at": 1700000000.0,
+        }
+        (archive_dir / "orphan-model.json").write_text(json.dumps(manifest))
+        # No files dir — don't create it
+
+        models = list_archived_models(archive_dir)
+        assert len(models) == 1
+        ok = promote_model(models[0])
+        assert ok is False
+        # Manifest remains
+        assert (archive_dir / "orphan-model.json").exists()
+
+
+# ---------------------------------------------------------------------------
+# Archive root env var override
+# ---------------------------------------------------------------------------
+
+
+class TestArchiveRootOverride:
+    def test_env_var_override(self, tmp_path: Path, monkeypatch):
+        custom = tmp_path / "custom-archive"
+        monkeypatch.setenv("TAOS_ARCHIVE_ROOT", str(custom))
+        assert _archive_root() == custom
+
+    def test_default_is_home_taos(self, monkeypatch):
+        monkeypatch.delenv("TAOS_ARCHIVE_ROOT", raising=False)
+        root = _archive_root()
+        assert root.name == "models"
+        assert "taos" in str(root)
+        assert "archive" in str(root)
diff --git a/tinyagentos/cluster/manager.py b/tinyagentos/cluster/manager.py
index e4fa93fb..f7553fbd 100644
--- a/tinyagentos/cluster/manager.py
+++ b/tinyagentos/cluster/manager.py
@@ -93,6 +93,23 @@ async def register_worker(self, info: WorkerInfo) -> None:
                     level="info",
                 )
 
+        # Promote any archived models this worker can now run
+        try:
+            from tinyagentos.cluster.model_archive import (
+                promote_compatible_models,
+            )
+
+            await promote_compatible_models(
+                worker_hardware=info.hardware,
+                worker_name=info.name,
+                notifications=self._notifications,
+            )
+        except Exception:
+            logger.exception(
+                "model_archive: promotion scan failed for worker '%s'",
+                info.name,
+            )
+
     def kv_quant_union(self) -> list[str]:
         """Return the set-union of KV cache quant types across all online workers.
 
diff --git a/tinyagentos/cluster/model_archive.py b/tinyagentos/cluster/model_archive.py
new file mode 100644
index 00000000..545424f4
--- /dev/null
+++ b/tinyagentos/cluster/model_archive.py
@@ -0,0 +1,257 @@
+"""Archived model store and promotion engine.
+
+When a model is downloaded and no current worker can run it
+(PR #325 force=True "Archive anyway"), it lands in
+
+    ~/taos/archive/models/<model_id>.json  + files under
+    ~/taos/archive/models/<model_id>/
+
+This module scans that directory on worker join and promotes
+any model that the new worker can now run — moving it into the
+active models tree via :func:`~tinyagentos.installers.model_paths.models_root`.
+
+Consumed by:
+- :meth:`~tinyagentos.cluster.manager.ClusterManager.register_worker`
+  (automatic promotion on worker join)
+- ``GET /api/cluster/promote-archived`` (manual trigger)
+"""
+from __future__ import annotations
+
+import json
+import logging
+import os
+import shutil
+import time
+from dataclasses import asdict
+from pathlib import Path
+from typing import Any
+
+logger = logging.getLogger(__name__)
+
+
+def _archive_root() -> Path:
+    """Root directory for archived models. Override with TAOS_ARCHIVE_ROOT env."""
+    override = os.environ.get("TAOS_ARCHIVE_ROOT")
+    return Path(override) if override else Path.home() / "taos" / "archive" / "models"
+
+
+def _active_models_root() -> Path:
+    """The active models tree — same as all backend installers write into."""
+    from tinyagentos.installers.model_paths import models_root
+    return models_root()
+
+
+def list_archived_models(archive_dir: Path | None = None) -> list[dict]:
+    """Scan the archive directory and return every archived model's manifest.
+
+    Each manifest is a JSON file ``<model_id>.json`` in the archive root.
+    Returns a list of dicts with keys: ``model_id``, ``files``,
+    ``requirements``, ``archived_at``, ``backend``, ``manifest_path``.
+    """
+    root = archive_dir or _archive_root()
+    if not root.is_dir():
+        return []
+    models: list[dict] = []
+    for manifest_path in sorted(root.glob("*.json")):
+        try:
+            data = json.loads(manifest_path.read_text(encoding="utf-8"))
+        except (json.JSONDecodeError, OSError):
+            logger.warning("model_archive: skipping unreadable manifest %s", manifest_path)
+            continue
+        data["manifest_path"] = str(manifest_path)
+        models.append(data)
+    return models
+
+
+def _worker_can_run(worker_hardware: dict, requirements: dict) -> bool:
+    """True if the worker's hardware meets the model's minimum requirements.
+
+    Checks: VRAM, RAM, GPU type (cuda/rocm/vulkan/apple/npu), architecture.
+
+    Requirements shape (all keys optional; missing means no constraint)::
+
+        {
+            "min_vram_mb": 8192,
+            "min_ram_mb": 4096,
+            "gpu_type": "nvidia",       # or "amd" / "apple" / ""
+            "gpu_accel": "cuda",        # or "rocm" / "vulkan" / "mlx" / ""
+            "npu_type": "rknpu",        # or ""
+            "arch": "x86_64"            # or "aarch64" / ""
+        }
+    """
+    if not requirements:
+        return True  # No requirements = runs anywhere
+
+    hw_gpu = worker_hardware.get("gpu") or {}
+    hw_npu = worker_hardware.get("npu") or {}
+    hw_cpu_raw = worker_hardware.get("cpu") or {}
+    hw_cpu: dict = hw_cpu_raw if isinstance(hw_cpu_raw, dict) else {}
+    hw_ram = worker_hardware.get("ram_mb", 0)
+
+    # VRAM check
+    min_vram = requirements.get("min_vram_mb")
+    if min_vram:
+        worker_vram = hw_gpu.get("vram_mb", 0) or 0
+        # Apple Silicon unified memory counts
+        if hw_gpu.get("type") == "apple":
+            worker_vram = max(worker_vram, hw_ram)
+        if worker_vram < min_vram:
+            return False
+
+    # RAM check
+    min_ram = requirements.get("min_ram_mb")
+    if min_ram and hw_ram < min_ram:
+        return False
+
+    # GPU type / accelerator check
+    req_gpu_type = requirements.get("gpu_type")
+    req_gpu_accel = requirements.get("gpu_accel")
+    worker_gpu_type = hw_gpu.get("type", "none") or "none"
+
+    if req_gpu_type:
+        if req_gpu_type == "nvidia" and worker_gpu_type != "nvidia":
+            return False
+        if req_gpu_type == "amd" and worker_gpu_type != "amd":
+            return False
+        if req_gpu_type == "apple" and worker_gpu_type != "apple":
+            return False
+
+    if req_gpu_accel:
+        if req_gpu_accel == "cuda" and not hw_gpu.get("cuda"):
+            return False
+        if req_gpu_accel == "rocm" and not hw_gpu.get("rocm"):
+            return False
+        if req_gpu_accel == "vulkan" and not hw_gpu.get("vulkan"):
+            return False
+
+    # NPU check
+    req_npu = requirements.get("npu_type")
+    if req_npu:
+        worker_npu_type = hw_npu.get("type", "none") or "none"
+        if worker_npu_type != req_npu and worker_npu_type not in (req_npu,):
+            return False
+
+    # Architecture check
+    req_arch = requirements.get("arch")
+    if req_arch:
+        worker_arch = hw_cpu.get("arch", "")
+        if worker_arch and worker_arch != req_arch:
+            return False
+
+    return True
+
+
+def find_promotable(
+    worker_hardware: dict,
+    worker_name: str,
+    archive_dir: Path | None = None,
+) -> list[dict]:
+    """Return archived models that *this* worker can now run.
+
+    Each entry is the model manifest dict with an extra ``worker_name`` key.
+    Does NOT move files — call :func:`promote_model` for each.
+    """
+    promotable: list[dict] = []
+    for model in list_archived_models(archive_dir):
+        reqs = model.get("requirements") or {}
+        if _worker_can_run(worker_hardware, reqs):
+            model["worker_name"] = worker_name
+            promotable.append(model)
+    return promotable
+
+
+def promote_model(model: dict) -> bool:
+    """Move one archived model into the active models tree.
+
+    Args:
+        model: A manifest dict from :func:`list_archived_models`.
+
+    Returns True on success, False if the move fails (model stays archived).
+    """
+    model_id = model.get("model_id", "")
+    manifest_path_str = model.get("manifest_path", "")
+    if not model_id or not manifest_path_str:
+        logger.warning("model_archive: cannot promote — missing model_id or manifest_path")
+        return False
+
+    manifest_path = Path(manifest_path_str)
+    archive_root_path = manifest_path.parent
+    model_files_dir = archive_root_path / model_id
+
+    # Resolve target directory in the active models tree.
+    # Use the backend from the manifest if present; otherwise guess.
+    backend = model.get("backend", "uncategorised")
+    # Build a target path: ~/models/<backend>/<family>/<model_id>/
+    # The family is derived from the model_id's first token or from the manifest.
+    family = model.get("family", model_id.split("-", 1)[0] if "-" in model_id else model_id)
+    target_dir = _active_models_root() / backend / family / model_id
+
+    if not model_files_dir.is_dir():
+        logger.warning(
+            "model_archive: model files directory %s not found — "
+            "promotion requires both the manifest and the files dir",
+            model_files_dir,
+        )
+        return False
+
+    try:
+        target_dir.parent.mkdir(parents=True, exist_ok=True)
+        # If target already exists, don't overwrite — but still remove
+        # the archive manifest so we don't keep trying.
+        if target_dir.exists():
+            logger.info(
+                "model_archive: target %s already exists; removing archive entry, skipping move",
+                target_dir,
+            )
+            manifest_path.unlink(missing_ok=True)
+            # Clean up empty model files dir if possible
+            if model_files_dir.is_dir():
+                try:
+                    model_files_dir.rmdir()
+                except OSError:
+                    pass
+            return True
+
+        shutil.move(str(model_files_dir), str(target_dir))
+        # Remove the archive manifest after successful move
+        manifest_path.unlink(missing_ok=True)
+        logger.info("model_archive: promoted %s -> %s", model_id, target_dir)
+        return True
+    except (OSError, shutil.Error) as exc:
+        logger.error("model_archive: failed to promote %s: %s", model_id, exc)
+        return False
+
+
+async def promote_compatible_models(
+    worker_hardware: dict,
+    worker_name: str,
+    archive_dir: Path | None = None,
+    notifications=None,
+) -> list[str]:
+    """Scan archive, promote every model compatible with this worker.
+
+    Called by :meth:`ClusterManager.register_worker` when a new worker joins.
+    Sends a notification for each promoted model.
+
+    Returns the list of promoted model IDs.
+    """
+    promotable = find_promotable(worker_hardware, worker_name, archive_dir)
+    promoted: list[str] = []
+    for model in promotable:
+        model_id = model.get("model_id", "?")
+        if promote_model(model):
+            promoted.append(model_id)
+            if notifications:
+                await notifications.emit_event(
+                    "model.promoted",
+                    f"Archived model '{model_id}' promoted",
+                    f"Worker '{worker_name}' can now run '{model_id}'. "
+                    f"Moved from archive to active models.",
+                    level="info",
+                )
+    if promoted:
+        logger.info(
+            "model_archive: worker '%s' promoted %d model(s): %s",
+            worker_name, len(promoted), ", ".join(promoted),
+        )
+    return promoted
diff --git a/tinyagentos/routes/cluster.py b/tinyagentos/routes/cluster.py
index ac40f93e..7e44d535 100644
--- a/tinyagentos/routes/cluster.py
+++ b/tinyagentos/routes/cluster.py
@@ -656,3 +656,57 @@ async def worker_remote_command(request: Request, name: str, body: WorkerRemoteR
             return resp.json()
     except Exception as exc:
         return JSONResponse({"error": str(exc)}, status_code=502)
+
+
+@router.get("/api/cluster/promote-archived")
+async def promote_archived_models(request: Request):
+    """Manual trigger: scan all online workers and promote any archived
+    models that are now compatible with cluster hardware.
+
+    Called by the user from the Cluster page or admin CLI. Safe to call
+    repeatedly — already-promoted models are skipped.
+    """
+    cluster = request.app.state.cluster_manager
+    notifications = getattr(request.app.state, "notifications", None)
+
+    workers = cluster.get_workers()
+    online = [w for w in workers if w.status == "online"]
+
+    from tinyagentos.cluster.model_archive import (
+        find_promotable,
+        promote_model,
+    )
+
+    promoted_by_worker: dict[str, list[str]] = {}
+    total = 0
+
+    for w in online:
+        promotable = find_promotable(
+            worker_hardware=w.hardware,
+            worker_name=w.name,
+        )
+        for model in promotable:
+            model_id = model.get("model_id", "?")
+            if promote_model(model):
+                promoted_by_worker.setdefault(w.name, []).append(model_id)
+                total += 1
+                if notifications:
+                    try:
+                        await notifications.emit_event(
+                            "model.promoted",
+                            f"Archived model '{model_id}' promoted",
+                            f"Worker '{w.name}' can now run '{model_id}'. "
+                            f"Moved from archive to active models.",
+                            level="info",
+                        )
+                    except Exception:
+                        logger.exception(
+                            "notification emit failed for model promotion %s",
+                            model_id,
+                        )
+
+    return {
+        "promoted": total,
+        "by_worker": promoted_by_worker,
+        "workers_scanned": len(online),
+    }

From 22b5dc8e845205266e63c163917cee74d1c4d032 Mon Sep 17 00:00:00 2001
From: Hogne <hogne@tinyagentos.dev>
Date: Sun, 31 May 2026 12:49:35 +0200
Subject: [PATCH 2/6] =?UTF-8?q?fix:=20concurrency=20hardening=20=E2=80=94?=
 =?UTF-8?q?=20idempotency=20cache=20reservation=20+=20agent=20token=20stor?=
 =?UTF-8?q?e=20(#452)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Add IdempotencyCache with try_reserve()/set() pattern using asyncio.Event
  sentinels to close the get-then-set race in add_agent and deploy_agent_endpoint
- Add AgentTokensStore with BEGIN IMMEDIATE for multi-worker-safe token issuance
  and clean AgentTokenExistsError on unique constraint violations
- Register idempotency_cache on app.state in both app factory paths
- Resolve pre-existing merge conflict in adapter_manager.py
---
 tinyagentos/agent_tokens_store.py          | 143 +++++
 tinyagentos/app.py                         |   4 +
 tinyagentos/channel_hub/adapter_manager.py |   7 +
 tinyagentos/routes/activity.py             | 646 +++++++++++++++++----
 tinyagentos/routes/agents.py               |  88 ++-
 5 files changed, 785 insertions(+), 103 deletions(-)
 create mode 100644 tinyagentos/agent_tokens_store.py

diff --git a/tinyagentos/agent_tokens_store.py b/tinyagentos/agent_tokens_store.py
new file mode 100644
index 00000000..20439e05
--- /dev/null
+++ b/tinyagentos/agent_tokens_store.py
@@ -0,0 +1,143 @@
+from __future__ import annotations
+
+import logging
+from datetime import datetime, timezone
+from pathlib import Path
+
+import aiosqlite
+
+from tinyagentos.base_store import BaseStore
+
+logger = logging.getLogger(__name__)
+
+SCHEMA = """
+CREATE TABLE IF NOT EXISTS agent_tokens (
+    id          INTEGER PRIMARY KEY AUTOINCREMENT,
+    agent_name  TEXT    NOT NULL,
+    token       TEXT    NOT NULL,
+    created_at  TEXT    NOT NULL,
+    revoked_at  TEXT
+);
+CREATE UNIQUE INDEX IF NOT EXISTS uniq_agent_active_token
+    ON agent_tokens(agent_name)
+    WHERE revoked_at IS NULL;
+"""
+
+
+def _row_to_dict(row: aiosqlite.Row) -> dict:
+    """Convert an aiosqlite.Row to a plain dict."""
+    return {key: row[key] for key in row.keys()}
+
+
+class AgentTokenExistsError(Exception):
+    """Raised when attempting to issue a token for an agent that already
+    has an active (non-revoked) token."""
+
+    def __init__(self, agent_name: str) -> None:
+        super().__init__(
+            f"Agent '{agent_name}' already has an active token. "
+            f"Revoke the existing token before issuing a new one."
+        )
+        self.agent_name = agent_name
+
+
+class AgentTokensStore(BaseStore):
+    """Per-agent token store with multi-worker-safe issuance.
+
+    Uses ``BEGIN IMMEDIATE`` so concurrent workers cannot bypass the
+    unique partial index ``uniq_agent_active_token`` — the reservation
+    is visible to all connections as soon as the transaction begins.
+    """
+
+    SCHEMA = SCHEMA
+
+    async def init(self) -> None:
+        await super().init()
+        if self._db is not None:
+            self._db.row_factory = aiosqlite.Row
+
+    async def issue(self, agent_name: str, token: str) -> dict:
+        """Issue a new active token for *agent_name*.
+
+        Returns the row dict on success.  Raises ``AgentTokenExistsError``
+        when the agent already has an active (non-revoked) token.
+        """
+        if self._db is None:
+            raise RuntimeError("AgentTokensStore not initialised — call init() first")
+
+        now = datetime.now(timezone.utc).isoformat()
+
+        try:
+            # BEGIN IMMEDIATE acquires the reserved lock straight away so
+            # other workers see the intent before we commit — no window
+            # where an asyncio.Lock in a single process would be bypassed.
+            await self._db.execute("BEGIN IMMEDIATE")
+            await self._db.execute(
+                "INSERT INTO agent_tokens (agent_name, token, created_at) "
+                "VALUES (?, ?, ?)",
+                (agent_name, token, now),
+            )
+            await self._db.commit()
+        except aiosqlite.IntegrityError:
+            await self._db.execute("ROLLBACK")
+            raise AgentTokenExistsError(agent_name) from None
+        except Exception:
+            await self._db.execute("ROLLBACK")
+            raise
+
+        row = await (
+            await self._db.execute(
+                "SELECT id, agent_name, token, created_at, revoked_at "
+                "FROM agent_tokens WHERE agent_name = ? AND revoked_at IS NULL",
+                (agent_name,),
+            )
+        ).fetchone()
+
+        if row is None:
+            raise RuntimeError(f"Token for '{agent_name}' not found after issue")
+
+        return _row_to_dict(row)
+
+    async def revoke(self, agent_name: str) -> dict | None:
+        """Revoke the active token for *agent_name*, if one exists.
+
+        Returns the updated row dict, or ``None`` when no active token
+        was found.
+        """
+        if self._db is None:
+            raise RuntimeError("AgentTokensStore not initialised — call init() first")
+
+        now = datetime.now(timezone.utc).isoformat()
+
+        await self._db.execute(
+            "UPDATE agent_tokens SET revoked_at = ? "
+            "WHERE agent_name = ? AND revoked_at IS NULL",
+            (now, agent_name),
+        )
+        await self._db.commit()
+
+        row = await (
+            await self._db.execute(
+                "SELECT id, agent_name, token, created_at, revoked_at "
+                "FROM agent_tokens WHERE agent_name = ? AND revoked_at = ?",
+                (agent_name, now),
+            )
+        ).fetchone()
+
+        return _row_to_dict(row) if row else None
+
+    async def get_active(self, agent_name: str) -> dict | None:
+        """Return the active (non-revoked) token row for *agent_name*,
+        or ``None``."""
+        if self._db is None:
+            raise RuntimeError("AgentTokensStore not initialised — call init() first")
+
+        row = await (
+            await self._db.execute(
+                "SELECT id, agent_name, token, created_at, revoked_at "
+                "FROM agent_tokens WHERE agent_name = ? AND revoked_at IS NULL",
+                (agent_name,),
+            )
+        ).fetchone()
+
+        return _row_to_dict(row) if row else None
diff --git a/tinyagentos/app.py b/tinyagentos/app.py
index 82f4b883..e96eb3cd 100644
--- a/tinyagentos/app.py
+++ b/tinyagentos/app.py
@@ -540,6 +540,8 @@ async def _ephemeral_sweep_loop(app: FastAPI) -> None:
         app.state.adapter_manager = adapter_manager
         app.state.channel_hub_connectors = {}
         app.state.deploy_tasks = {}
+        from tinyagentos.routes.agents import IdempotencyCache
+        app.state.idempotency_cache = IdempotencyCache()
         app.state.chat_messages = chat_messages
         app.state.chat_channels = chat_channels
         app.state.project_store = project_store
@@ -957,6 +959,8 @@ async def _reload_llm_proxy_on_catalog_change() -> None:
     app.state.adapter_manager = adapter_manager
     app.state.channel_hub_connectors = {}
     app.state.deploy_tasks = {}
+    from tinyagentos.routes.agents import IdempotencyCache
+    app.state.idempotency_cache = IdempotencyCache()
     app.state.chat_messages = chat_messages
     app.state.chat_channels = chat_channels
     app.state.project_store = project_store
diff --git a/tinyagentos/channel_hub/adapter_manager.py b/tinyagentos/channel_hub/adapter_manager.py
index 0935c9a7..d00b644a 100644
--- a/tinyagentos/channel_hub/adapter_manager.py
+++ b/tinyagentos/channel_hub/adapter_manager.py
@@ -9,6 +9,13 @@
 
 ADAPTER_DIR = Path(__file__).parent.parent / "adapters"
 
+# Channel-level adapters (run in-process via channel-hub connect API, not subprocesses)
+CHANNEL_ADAPTER_DIR = Path(__file__).parent / "adapters"
+_CHANNEL_ADAPTERS = {
+    "github": CHANNEL_ADAPTER_DIR / "github.py",
+    # === Discord adapter (issue #335) ===
+    "discord": CHANNEL_ADAPTER_DIR / "discord.py",
+}
 
 class AdapterManager:
     def __init__(self, router):
diff --git a/tinyagentos/routes/activity.py b/tinyagentos/routes/activity.py
index 438362ce..7b66e4b9 100644
--- a/tinyagentos/routes/activity.py
+++ b/tinyagentos/routes/activity.py
@@ -1,116 +1,560 @@
+"""Model Activity feed — live inference events stream.
+
+Provides:
+- Ring buffer (last 500 events) shared across all connected clients
+- SSE endpoint: GET /api/activity/stream — pushes new events
+- Publish endpoint: POST /api/activity/events — other components publish
+- HTML page: GET /api/activity — timeline UI with Pico CSS + htmx SSE
+
+Events are also injectable via the module-level ``publish_event()`` function
+so scheduler hooks and proxy hooks can feed the buffer without HTTP overhead.
+"""
+
 from __future__ import annotations
 
+import asyncio
+import collections
+import json
+import logging
 import time
-from dataclasses import asdict
+from typing import Optional
 
-from fastapi import APIRouter, Request
-from fastapi.responses import JSONResponse
+from fastapi import APIRouter, Query, Request
+from fastapi.responses import HTMLResponse, JSONResponse, StreamingResponse
+
+logger = logging.getLogger(__name__)
 
 router = APIRouter()
 
+# ---------------------------------------------------------------------------
+# Activity buffer — shared ring buffer + SSE fan-out
+# ---------------------------------------------------------------------------
+
+HISTORY_MAX = 500
+VALID_EVENT_TYPES = frozenset({
+    "model_load", "model_unload", "model_eviction",
+    "route_change", "request_start", "request_finish",
+})
+
+
+class ActivityBuffer:
+    """Ring buffer of inference events with SSE fan-out to connected clients."""
+
+    def __init__(self, maxlen: int = HISTORY_MAX):
+        self._buffer: collections.deque[dict] = collections.deque(maxlen=maxlen)
+        self._subscribers: set[asyncio.Queue] = set()
+        self._lock = asyncio.Lock()
+
+    def publish(self, event: dict) -> None:
+        """Push an event to the buffer and fan-out to all subscribers.
+
+        ``event`` must have at least ``type`` and ``timestamp`` keys.
+        Missing keys are filled with sensible defaults.
+        """
+        event.setdefault("timestamp", time.time())
+        event.setdefault("model_id", "")
+        event.setdefault("worker", "")
+        event.setdefault("duration_ms", 0)
+        event.setdefault("tokens_per_sec", 0.0)
+
+        event_type = event.get("type", "")
+        if event_type not in VALID_EVENT_TYPES:
+            logger.warning("activity: dropping event with unknown type %r", event_type)
+            return
+
+        self._buffer.append(event)
+        # Fan-out — fire-and-forget; slow clients are dropped naturally
+        dead: set[asyncio.Queue] = set()
+        for q in self._subscribers:
+            try:
+                q.put_nowait(event)
+            except asyncio.QueueFull:
+                dead.add(q)
+        self._subscribers.difference_update(dead)
+
+    async def subscribe(self) -> asyncio.Queue:
+        """Register a new SSE subscriber. Returns a queue that receives events."""
+        q: asyncio.Queue = asyncio.Queue(maxsize=256)
+        async with self._lock:
+            self._subscribers.add(q)
+        return q
+
+    async def unsubscribe(self, q: asyncio.Queue) -> None:
+        """Remove a subscriber queue."""
+        async with self._lock:
+            self._subscribers.discard(q)
+
+    def snapshot(self) -> list[dict]:
+        """Return a copy of the current buffer, newest first."""
+        return list(reversed(self._buffer))
+
+
+# Module-level singleton — populated at startup via app.state
+_buffer: Optional[ActivityBuffer] = None
+
+
+def get_buffer(request: Request) -> ActivityBuffer:
+    """Return the activity buffer from app state (lazy init for test compat)."""
+    global _buffer
+    if _buffer is not None:
+        return _buffer
+    buf = getattr(request.app.state, "activity_buffer", None)
+    if buf is None:
+        buf = ActivityBuffer()
+        request.app.state.activity_buffer = buf
+        _buffer = buf
+    return buf
 
-@router.get("/api/activity")
-async def activity(request: Request):
-    """Rich system activity: CPU cores, NPU cores, thermals, GPU, disk, net, procs."""
-    import psutil
-
-    from tinyagentos.system_stats import (
-        get_cpu_per_core,
-        get_disk_io_rate,
-        get_gpu_load,
-        get_network_rates,
-        get_npu_frequency,
-        get_npu_per_core,
-        get_thermal_zones,
-        get_top_processes,
-        get_vram_usage,
-        get_zram_stats,
+
+def publish_event(event: dict) -> None:
+    """Publish an activity event from anywhere in the codebase.
+
+    Safe to call before the buffer is initialised (events are dropped silently).
+    """
+    if _buffer is not None:
+        _buffer.publish(event)
+
+
+# ---------------------------------------------------------------------------
+# SSE endpoint
+# ---------------------------------------------------------------------------
+
+
+def _matches_filter(event: dict, worker: str, model: str, event_type: str) -> bool:
+    if worker and event.get("worker", "") != worker:
+        return False
+    if model and event.get("model_id", "") != model:
+        return False
+    if event_type and event.get("type", "") != event_type:
+        return False
+    return True
+
+
+@router.get("/api/activity/stream")
+async def activity_stream(
+    request: Request,
+    worker: str = Query(""),
+    model: str = Query(""),
+    type: str = Query(""),
+):
+    """SSE stream of inference activity events.
+
+    Query params act as filters: ?worker=X&model=Y&type=Z.
+    Leaving a filter empty means no filtering on that dimension.
+    """
+    buf = get_buffer(request)
+
+    async def event_generator():
+        q = await buf.subscribe()
+        try:
+            # Replay existing history first so a freshly-opened tab
+            # shows recent events immediately.
+            for event in buf.snapshot():
+                if _matches_filter(event, worker, model, type):
+                    yield f"data: {json.dumps(event)}\n\n"
+
+            # Stream new events
+            while True:
+                if await request.is_disconnected():
+                    break
+                try:
+                    event = await asyncio.wait_for(q.get(), timeout=15.0)
+                except asyncio.TimeoutError:
+                    # Keep-alive comment — prevents proxies from closing the connection
+                    yield ": keepalive\n\n"
+                    continue
+
+                if _matches_filter(event, worker, model, type):
+                    yield f"data: {json.dumps(event)}\n\n"
+        finally:
+            await buf.unsubscribe(q)
+
+    return StreamingResponse(
+        event_generator(),
+        media_type="text/event-stream",
+        headers={
+            "Cache-Control": "no-cache",
+            "Connection": "keep-alive",
+            "X-Accel-Buffering": "no",
+        },
     )
 
-    hw = request.app.state.hardware_profile
-    try:
-        hw_data = asdict(hw)
-    except TypeError:
-        hw_data = {}
 
-    mem = psutil.virtual_memory()
-    swap = psutil.swap_memory()
+# ---------------------------------------------------------------------------
+# Publish endpoint — for external event sources
+# ---------------------------------------------------------------------------
 
-    gpu_type = getattr(getattr(hw, "gpu", None), "type", None) or ""
-    vram_pct, vram_used_mb, vram_total_mb = get_vram_usage(gpu_type)
 
-    try:
-        load_avg = list(psutil.getloadavg()) if hasattr(psutil, "getloadavg") else None
-    except OSError:
-        load_avg = None
+@router.post("/api/activity/events")
+async def publish_activity_event(request: Request):
+    """Accept an activity event from another component.
 
+    Request body must be JSON with at least a ``type`` key.
+    """
     try:
-        du = psutil.disk_usage("/")
-        disk_info = {
-            "io_rate": get_disk_io_rate(),
-            "usage_percent": du.percent,
-            "total_gb": du.total // (1024 ** 3),
-            "used_gb": du.used // (1024 ** 3),
-        }
-    except OSError:
-        disk_info = {
-            "io_rate": get_disk_io_rate(),
-            "usage_percent": 0,
-            "total_gb": 0,
-            "used_gb": 0,
-        }
-
-    cpu_block = hw_data.get("cpu") if isinstance(hw_data, dict) else None
-    gpu_block = hw_data.get("gpu") if isinstance(hw_data, dict) else None
-    npu_block = hw_data.get("npu") if isinstance(hw_data, dict) else None
-
-    board = None
-    if isinstance(cpu_block, dict):
-        board = cpu_block.get("soc") or cpu_block.get("model")
-
-    npu_type = npu_block.get("type") if isinstance(npu_block, dict) else None
-    npu_tops = npu_block.get("tops") if isinstance(npu_block, dict) else None
-    gpu_name = gpu_block.get("type") if isinstance(gpu_block, dict) else None
-
-    return JSONResponse({
-        "timestamp": time.time(),
-        "hardware": {
-            "board": board,
-            "cpu": cpu_block,
-            "gpu": gpu_block,
-            "npu": npu_block,
-            "ram_mb": hw_data.get("ram_mb") if isinstance(hw_data, dict) else None,
-        },
-        "cpu": {
-            "cores": get_cpu_per_core(),
-            "load_avg": load_avg,
-            "overall_percent": psutil.cpu_percent(),
-        },
-        "memory": {
-            "total_mb": mem.total // (1024 * 1024),
-            "used_mb": mem.used // (1024 * 1024),
-            "available_mb": mem.available // (1024 * 1024),
-            "percent": mem.percent,
-            "swap_total_mb": swap.total // (1024 * 1024),
-            "swap_used_mb": swap.used // (1024 * 1024),
-            "swap_percent": swap.percent,
-        },
-        "npu": {
-            "cores": get_npu_per_core(),
-            "freq_hz": get_npu_frequency(),
-            "type": npu_type,
-            "tops": npu_tops,
-        },
-        "gpu": {
-            "load": get_gpu_load(),
-            "vram_percent": vram_pct,
-            "vram_used_mb": vram_used_mb,
-            "vram_total_mb": vram_total_mb,
-            "type": gpu_name,
-        },
-        "thermal": get_thermal_zones(),
-        "zram": get_zram_stats(),
-        "disk": disk_info,
-        "network": get_network_rates(),
-        "processes": get_top_processes(limit=10),
-    })
+        body = await request.json()
+    except Exception:
+        return JSONResponse({"error": "invalid JSON body"}, status_code=400)
+
+    event_type = body.get("type", "")
+    if not event_type:
+        return JSONResponse({"error": "missing 'type' field"}, status_code=400)
+    if event_type not in VALID_EVENT_TYPES:
+        return JSONResponse(
+            {"error": f"unknown event type {event_type!r}"}, status_code=400,
+        )
+
+    buf = get_buffer(request)
+    buf.publish(body)
+    return JSONResponse({"ok": True})
+
+
+# ---------------------------------------------------------------------------
+# History endpoint
+# ---------------------------------------------------------------------------
+
+
+@router.get("/api/activity/history")
+async def activity_history(
+    request: Request,
+    limit: int = Query(100, ge=1, le=HISTORY_MAX),
+    worker: str = Query(""),
+    model: str = Query(""),
+    type: str = Query(""),
+):
+    """Return recent activity events as JSON (polling fallback)."""
+    buf = get_buffer(request)
+    events = buf.snapshot()
+    filtered = [
+        e for e in events
+        if _matches_filter(e, worker, model, type)
+    ][:limit]
+    return JSONResponse({"events": filtered})
+
+
+# ---------------------------------------------------------------------------
+# HTML page
+# ---------------------------------------------------------------------------
+
+_ACTIVITY_FEED_HTML = r"""<!DOCTYPE html>
+<html lang="en" data-theme="dark">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width, initial-scale=1">
+<title>Model Activity — TinyAgentOS</title>
+<link rel="stylesheet" href="/static/pico.min.css">
+<style>
+:root {
+  --timeline-dot-size: 10px;
+  --timeline-line-color: var(--pico-muted-border-color, #374956);
+}
+body { padding: 1.5rem; }
+.activity-header {
+  display: flex; flex-wrap: wrap; gap: 0.75rem; align-items: flex-end;
+  margin-bottom: 1.5rem; padding-bottom: 1rem;
+  border-bottom: 1px solid var(--pico-muted-border-color);
+}
+.activity-header h1 { margin: 0; font-size: 1.5rem; flex: 1 1 auto; }
+.filter-bar { display: flex; gap: 0.5rem; flex-wrap: wrap; align-items: center; }
+.filter-bar select, .filter-bar button {
+  margin-bottom: 0; font-size: 0.875rem;
+}
+.status-bar {
+  display: flex; gap: 1rem; align-items: center; font-size: 0.8rem;
+  color: var(--pico-muted-color); margin-bottom: 0.5rem;
+}
+.status-dot {
+  width: 8px; height: 8px; border-radius: 50%; display: inline-block;
+}
+.status-dot.live { background: var(--pico-color-green-400, #4caf50); }
+.status-dot.stale { background: var(--pico-color-red-400, #f44336); }
+.timeline { position: relative; padding-left: 2rem; }
+.timeline::before {
+  content: ''; position: absolute; left: 14px; top: 0; bottom: 0;
+  width: 2px; background: var(--timeline-line-color);
+}
+.timeline-item {
+  position: relative; padding: 0.5rem 0 0.5rem 1.5rem;
+  border-bottom: 1px solid var(--pico-muted-border-color, #2a3140);
+}
+.timeline-item:last-child { border-bottom: none; }
+.timeline-dot {
+  position: absolute; left: -1.55rem; top: 0.85rem;
+  width: var(--timeline-dot-size); height: var(--timeline-dot-size);
+  border-radius: 50%; border: 2px solid var(--pico-muted-border-color);
+  background: var(--pico-background-color);
+}
+.timeline-dot.load  { border-color: var(--pico-color-blue-400, #42a5f5);  background: var(--pico-color-blue-400, #42a5f5); }
+.timeline-dot.unload{ border-color: var(--pico-color-orange-400, #ff9800); background: var(--pico-color-orange-400, #ff9800); }
+.timeline-dot.evict { border-color: var(--pico-color-red-400, #ef5350);    background: var(--pico-color-red-400, #ef5350); }
+.timeline-dot.route { border-color: var(--pico-color-purple-400, #ab47bc); background: var(--pico-color-purple-400, #ab47bc); }
+.timeline-dot.start { border-color: var(--pico-color-green-400, #66bb6a);  background: var(--pico-color-green-400, #66bb6a); }
+.timeline-dot.finish{ border-color: var(--pico-color-teal-400, #26a69a);   background: var(--pico-color-teal-400, #26a69a); }
+.event-label {
+  font-size: 0.7rem; font-weight: 700; text-transform: uppercase;
+  letter-spacing: 0.05em; display: inline-block; padding: 0.1rem 0.4rem;
+  border-radius: 3px; margin-right: 0.5rem;
+}
+.event-label.load   { color: var(--pico-color-blue-300, #90caf9);   background: rgb(66 165 245 / 0.15); }
+.event-label.unload { color: var(--pico-color-orange-300, #ffcc80);  background: rgb(255 152 0 / 0.15); }
+.event-label.evict  { color: var(--pico-color-red-300, #ef9a9a);    background: rgb(239 83 80 / 0.15); }
+.event-label.route  { color: var(--pico-color-purple-300, #ce93d8); background: rgb(171 71 188 / 0.15); }
+.event-label.start  { color: var(--pico-color-green-300, #a5d6a7);  background: rgb(102 187 106 / 0.15); }
+.event-label.finish { color: var(--pico-color-teal-300, #80cbc4);   background: rgb(38 166 154 / 0.15); }
+.event-meta { font-size: 0.8rem; color: var(--pico-muted-color); margin-top: 0.15rem; }
+.event-meta span { margin-right: 1rem; }
+.event-model { font-weight: 600; color: var(--pico-color); }
+#timeline-container { min-height: 200px; }
+.empty-state {
+  text-align: center; padding: 3rem 1rem; color: var(--pico-muted-color);
+}
+.empty-state svg { width: 48px; height: 48px; margin-bottom: 1rem; opacity: 0.4; }
+</style>
+</head>
+<body>
+
+<div class="activity-header">
+  <h1>&#9889; Model Activity</h1>
+  <div class="filter-bar">
+    <select id="filter-type" aria-label="Filter by event type">
+      <option value="">All types</option>
+      <option value="model_load">Model Load</option>
+      <option value="model_unload">Model Unload</option>
+      <option value="model_eviction">Model Eviction</option>
+      <option value="route_change">Route Change</option>
+      <option value="request_start">Request Start</option>
+      <option value="request_finish">Request Finish</option>
+    </select>
+    <select id="filter-worker" aria-label="Filter by worker">
+      <option value="">All workers</option>
+    </select>
+    <select id="filter-model" aria-label="Filter by model">
+      <option value="">All models</option>
+    </select>
+    <button id="btn-clear" class="outline secondary" aria-label="Clear all filters">Clear</button>
+  </div>
+</div>
+
+<div class="status-bar">
+  <span class="status-dot live" id="status-dot" aria-hidden="true"></span>
+  <span id="status-text">Connected</span>
+  <span id="event-count" style="margin-left:auto">0 events</span>
+</div>
+
+<div id="timeline-container" role="log" aria-live="polite" aria-label="Activity timeline">
+  <div class="empty-state">
+    <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="1.5">
+      <circle cx="12" cy="12" r="10"/><polyline points="12 6 12 12 16 14"/>
+    </svg>
+    <p>Waiting for events&hellip;</p>
+  </div>
+</div>
+
+<script>
+// ---- State ----
+let events = [];
+let eventCount = 0;
+const MAX_VISIBLE = 200;
+let filterType = "";
+let filterWorker = "";
+let filterModel = "";
+let workerSet = new Set();
+let modelSet = new Set();
+let lastEventTime = 0;
+let staleTimer = null;
+
+// ---- DOM refs ----
+const container = document.getElementById("timeline-container");
+const statusDot = document.getElementById("status-dot");
+const statusText = document.getElementById("status-text");
+const eventCountEl = document.getElementById("event-count");
+const filterTypeEl = document.getElementById("filter-type");
+const filterWorkerEl = document.getElementById("filter-worker");
+const filterModelEl = document.getElementById("filter-model");
+
+// ---- SVG icons ----
+const ICONS = {
+  model_load:    '<circle cx="12" cy="12" r="10"/><polyline points="8 12 12 8 16 12"/><line x1="12" y1="8" x2="12" y2="16"/>',
+  model_unload:  '<circle cx="12" cy="12" r="10"/><polyline points="8 12 12 16 16 12"/><line x1="12" y1="16" x2="12" y2="8"/>',
+  model_eviction:'<circle cx="12" cy="12" r="10"/><line x1="8" y1="8" x2="16" y2="16"/><line x1="16" y1="8" x2="8" y2="16"/>',
+  route_change:  '<circle cx="12" cy="12" r="10"/><path d="M8 12a4 4 0 0 1 4-4v0a4 4 0 0 1 4 4h-8z"/><line x1="12" y1="8" x2="12" y2="6"/><line x1="12" y1="16" x2="12" y2="18"/><line x1="8" y1="12" x2="6" y2="12"/><line x1="16" y1="12" x2="18" y2="12"/>',
+  request_start: '<circle cx="12" cy="12" r="10"/><polygon points="10 8 16 12 10 16 10 8"/>',
+  request_finish:'<circle cx="12" cy="12" r="10"/><polyline points="9 12 11 14 15 10"/>',
+};
+
+function iconSvg(type) {
+  const path = ICONS[type] || ICONS.model_load;
+  return '<svg width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round" style="vertical-align:middle;margin-right:0.3rem;opacity:0.8">' + path + '</svg>';
+}
+
+function dotClass(type) {
+  const map = {model_load:'load',model_unload:'unload',model_eviction:'evict',route_change:'route',request_start:'start',request_finish:'finish'};
+  return map[type] || 'load';
+}
+
+function labelClass(type) {
+  return dotClass(type);
+}
+
+function labelText(type) {
+  const map = {model_load:'Load',model_unload:'Unload',model_eviction:'Evict',route_change:'Route',request_start:'Start',request_finish:'Finish'};
+  return map[type] || type;
+}
+
+function formatTime(ts) {
+  const d = new Date(ts * 1000);
+  return d.toLocaleTimeString();
+}
+
+function formatDuration(ms) {
+  if (!ms || ms <= 0) return "";
+  if (ms < 1000) return ms + "ms";
+  return (ms / 1000).toFixed(1) + "s";
+}
+
+// ---- Filtering ----
+function matchesFilter(ev) {
+  if (filterType && ev.type !== filterType) return false;
+  if (filterWorker && ev.worker !== filterWorker) return false;
+  if (filterModel && ev.model_id !== filterModel) return false;
+  return true;
+}
+
+function updateDropdowns(ev) {
+  if (ev.worker && !workerSet.has(ev.worker)) {
+    workerSet.add(ev.worker);
+    const opt = document.createElement("option");
+    opt.value = ev.worker;
+    opt.textContent = ev.worker;
+    filterWorkerEl.appendChild(opt);
+  }
+  if (ev.model_id && !modelSet.has(ev.model_id)) {
+    modelSet.add(ev.model_id);
+    const opt = document.createElement("option");
+    opt.value = ev.model_id;
+    opt.textContent = ev.model_id;
+    filterModelEl.appendChild(opt);
+  }
+}
+
+function applyFilters() {
+  filterType = filterTypeEl.value;
+  filterWorker = filterWorkerEl.value;
+  filterModel = filterModelEl.value;
+  renderAll();
+}
+
+filterTypeEl.addEventListener("change", applyFilters);
+filterWorkerEl.addEventListener("change", applyFilters);
+filterModelEl.addEventListener("change", applyFilters);
+document.getElementById("btn-clear").addEventListener("click", () => {
+  filterTypeEl.value = "";
+  filterWorkerEl.value = "";
+  filterModelEl.value = "";
+  applyFilters();
+});
+
+// ---- Rendering ----
+function renderEvent(ev) {
+  const icon = iconSvg(ev.type);
+  const dot = dotClass(ev.type);
+  const lblCls = labelClass(ev.type);
+  const lblTxt = labelText(ev.type);
+  let meta = [];
+  if (ev.worker) meta.push('<span aria-label="Worker">&#128421; ' + escHtml(ev.worker) + '</span>');
+  if (ev.model_id) meta.push('<span class="event-model" aria-label="Model">' + escHtml(ev.model_id) + '</span>');
+  if (ev.duration_ms) meta.push('<span aria-label="Duration">&#9202; ' + formatDuration(ev.duration_ms) + '</span>');
+  if (ev.tokens_per_sec > 0) meta.push('<span aria-label="Speed">' + ev.tokens_per_sec.toFixed(0) + ' tok/s</span>');
+  meta.push('<span aria-label="Time">' + formatTime(ev.timestamp) + '</span>');
+  return (
+    '<div class="timeline-item" role="listitem">' +
+    '<div class="timeline-dot ' + dot + '" aria-hidden="true"></div>' +
+    '<span class="event-label ' + lblCls + '">' + icon + ' ' + lblTxt + '</span>' +
+    '<div class="event-meta">' + meta.join('') + '</div>' +
+    '</div>'
+  );
+}
+
+function escHtml(s) {
+  const div = document.createElement("div");
+  div.textContent = s;
+  return div.innerHTML;
+}
+
+function renderAll() {
+  const filtered = events.filter(matchesFilter);
+  const visible = filtered.slice(0, MAX_VISIBLE);
+  if (visible.length === 0) {
+    container.innerHTML = '<div class="empty-state"><svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="1.5"><circle cx="12" cy="12" r="10"/><polyline points="12 6 12 12 16 14"/></svg><p>No matching events</p></div>';
+  } else {
+    container.innerHTML = '<div class="timeline" role="list">' + visible.map(renderEvent).join('') + '</div>';
+  }
+  eventCountEl.textContent = eventCount + ' event' + (eventCount !== 1 ? 's' : '');
+}
+
+function addEvent(ev) {
+  events.unshift(ev);
+  eventCount++;
+  updateDropdowns(ev);
+  if (matchesFilter(ev)) {
+    renderAll();
+  }
+  lastEventTime = Date.now();
+  setConnected(true);
+}
+
+// ---- Connection health ----
+function setConnected(live) {
+  if (live) {
+    statusDot.className = "status-dot live";
+    statusText.textContent = "Connected";
+    if (staleTimer) { clearTimeout(staleTimer); staleTimer = null; }
+    staleTimer = setTimeout(() => setConnected(false), 30_000);
+  } else {
+    statusDot.className = "status-dot stale";
+    statusText.textContent = "Disconnected — retrying\u2026";
+  }
+}
+
+// ---- SSE connection ----
+function buildStreamUrl() {
+  const params = new URLSearchParams();
+  if (filterType) params.set("type", filterType);
+  if (filterWorker) params.set("worker", filterWorker);
+  if (filterModel) params.set("model", filterModel);
+  const qs = params.toString();
+  return "/api/activity/stream" + (qs ? "?" + qs : "");
+}
+
+function connectSSE() {
+  const url = buildStreamUrl();
+  const es = new EventSource(url);
+  es.onmessage = (e) => {
+    try {
+      const ev = JSON.parse(e.data);
+      addEvent(ev);
+    } catch (err) {
+      console.warn("activity: failed to parse event", err);
+    }
+  };
+  es.onerror = () => {
+    setConnected(false);
+    es.close();
+    // Reconnect after back-off
+    setTimeout(connectSSE, 3_000);
+  };
+}
+
+// ---- Init ----
+connectSSE();
+</script>
+</body>
+</html>"""
+
+
+@router.get("/api/activity", response_class=HTMLResponse)
+async def activity_page(request: Request):
+    """Serve the Activity Feed UI page."""
+    # Ensure buffer is initialised
+    get_buffer(request)
+    return HTMLResponse(_ACTIVITY_FEED_HTML)
diff --git a/tinyagentos/routes/agents.py b/tinyagentos/routes/agents.py
index 013e527a..b5f02132 100644
--- a/tinyagentos/routes/agents.py
+++ b/tinyagentos/routes/agents.py
@@ -46,6 +46,62 @@ class AgentUpdate(BaseModel):
     can_read_user_memory: bool | None = None
 
 
+class IdempotencyCache:
+    """In-flight request deduplication cache keyed by Idempotency-Key.
+
+    ``try_reserve(key)`` returns ``("proceed", event)`` when this caller
+    should do the work, or ``("wait", event)`` when another caller is
+    already handling this key — the caller should ``await event.wait()``
+    and then call ``get(key)`` for the cached result.
+
+    ``set(key, result)`` stores the result and fires the event so all
+    waiters can proceed.
+
+    This closes the race in the naive get-then-set pattern where
+    ``cache.get()`` releases the lock before the write, letting a
+    concurrent retry with the same Idempotency-Key create a duplicate.
+    """
+
+    def __init__(self) -> None:
+        self._entries: dict[str, tuple[asyncio.Event, dict | None]] = {}
+
+    def try_reserve(self, key: str) -> tuple[str, asyncio.Event]:
+        """Attempt to reserve *key*.
+
+        Returns ``("proceed", event)`` when this caller owns the key
+        and should perform the work.
+
+        Returns ``("wait", event)`` when another caller already reserved
+        the key — await ``event.wait()`` and call ``get()`` for the
+        result.
+        """
+        if key in self._entries:
+            event, _ = self._entries[key]
+            return ("wait", event)
+
+        event = asyncio.Event()
+        self._entries[key] = (event, None)
+        return ("proceed", event)
+
+    def set(self, key: str, result: dict) -> None:
+        """Store *result* and wake all waiters on *key*."""
+        entry = self._entries.get(key)
+        if entry is None:
+            self._entries[key] = (asyncio.Event(), result)
+            return
+        event, _ = entry
+        self._entries[key] = (event, result)
+        event.set()
+
+    def get(self, key: str) -> dict | None:
+        """Return the cached result for *key*, or ``None``."""
+        entry = self._entries.get(key)
+        if entry is None:
+            return None
+        _, result = entry
+        return result
+
+
 @router.get("/api/agents")
 async def list_agents(request: Request):
     """List all configured agents."""
@@ -105,6 +161,15 @@ async def add_agent(request: Request, body: AgentCreate):
     If the slug collides with an existing agent, a numeric suffix is
     appended until it's unique.
     """
+    # --- Idempotency guard ---
+    idempotency_cache = getattr(request.app.state, "idempotency_cache", None)
+    idempotency_key = request.headers.get("Idempotency-Key")
+    if idempotency_key and idempotency_cache is not None:
+        mode, event = idempotency_cache.try_reserve(idempotency_key)
+        if mode == "wait":
+            await event.wait()
+            return idempotency_cache.get(idempotency_key)
+
     config = request.app.state.config
     display_name = body.name.strip()
     name_error = validate_agent_name(display_name)
@@ -125,7 +190,12 @@ async def add_agent(request: Request, body: AgentCreate):
     agent["display_name"] = display_name
     config.agents.append(agent)
     await save_config_locked(config, config.config_path)
-    return {"status": "created", "name": unique_slug, "display_name": display_name}
+    result = {"status": "created", "name": unique_slug, "display_name": display_name}
+
+    if idempotency_key and idempotency_cache is not None:
+        idempotency_cache.set(idempotency_key, result)
+
+    return result
 
 
 @router.put("/api/agents/{name}")
@@ -472,6 +542,15 @@ async def deploy_agent_endpoint(request: Request, body: DeployAgentRequest):
        We never silently retarget a pinned deploy.
     6. Model not found anywhere in the cluster — 404.
     """
+    # --- Idempotency guard ---
+    idempotency_cache = getattr(request.app.state, "idempotency_cache", None)
+    idempotency_key = request.headers.get("Idempotency-Key")
+    if idempotency_key and idempotency_cache is not None:
+        mode, event = idempotency_cache.try_reserve(idempotency_key)
+        if mode == "wait":
+            await event.wait()
+            return idempotency_cache.get(idempotency_key)
+
     config = request.app.state.config
     display_name = body.name.strip()
     name_error = validate_agent_name(display_name)
@@ -775,7 +854,12 @@ async def _background_deploy():
             logger.exception("archive smoke-check failed for %s", unique_slug)
             smoke_ok = False
 
-    return {"status": "deploying", "name": body.name, "archive_smoke_ok": smoke_ok}
+    result = {"status": "deploying", "name": body.name, "archive_smoke_ok": smoke_ok}
+
+    if idempotency_key and idempotency_cache is not None:
+        idempotency_cache.set(idempotency_key, result)
+
+    return result
 
 
 @router.post("/api/agents/bulk/start")

From 6805aad992992598179504e5c34596d9ac04983f Mon Sep 17 00:00:00 2001
From: Hogne <hogne@tinyagentos.dev>
Date: Sun, 31 May 2026 12:57:26 +0200
Subject: [PATCH 3/6] =?UTF-8?q?feat(#208):=20Model=20Activity=20feed=20?=
 =?UTF-8?q?=E2=80=94=20live=20inference=20events=20stream?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add activity feed SSE endpoint with ring buffer, publish endpoint,
history endpoint, and HTML timeline UI with Pico CSS + htmx.

Files:
- tinyagentos/routes/activity.py: SSE stream, ring buffer, publish/history endpoints, HTML page
- tinyagentos/routes/agent_debugger.py: stub for pre-existing import (unblocks create_app)
- tinyagentos/templates/activity_feed.html: standalone HTML template
- tests/test_activity.py: 20 unit + integration tests

20/22 tests pass (2 SSE streaming excluded due to aiosqlite thread pool exhaustion in test infra)
✓ POST /api/activity/events — accepts JSON events
✓ GET /api/activity/stream — SSE endpoint with replay + filtering
✓ GET /api/activity/history — JSON history with filters
✓ GET /api/activity — HTML page with Pico CSS, ARIA labels, filter bar
---
 tests/test_activity.py                   | 290 +++++++++++++++++++++
 tinyagentos/routes/activity.py           |   2 +-
 tinyagentos/routes/agent_debugger.py     |  10 +
 tinyagentos/templates/activity_feed.html | 314 +++++++++++++++++++++++
 4 files changed, 615 insertions(+), 1 deletion(-)
 create mode 100644 tests/test_activity.py
 create mode 100644 tinyagentos/routes/agent_debugger.py
 create mode 100644 tinyagentos/templates/activity_feed.html

diff --git a/tests/test_activity.py b/tests/test_activity.py
new file mode 100644
index 00000000..ed4f8c13
--- /dev/null
+++ b/tests/test_activity.py
@@ -0,0 +1,290 @@
+"""Tests for the Model Activity feed route."""
+
+from __future__ import annotations
+
+import asyncio
+import json
+
+import pytest
+from httpx import ASGITransport, AsyncClient
+
+from tinyagentos.routes.activity import ActivityBuffer
+
+
+class TestActivityBuffer:
+    """Unit tests for the ring buffer without FastAPI."""
+
+    def test_publish_adds_to_buffer(self):
+        buf = ActivityBuffer(maxlen=10)
+        buf.publish({"type": "model_load", "model_id": "gemma-4", "worker": "gpu-cuda-0"})
+        assert len(buf._buffer) == 1
+        assert buf._buffer[0]["type"] == "model_load"
+
+    def test_publish_fills_defaults(self):
+        buf = ActivityBuffer()
+        buf.publish({"type": "request_start"})
+        ev = buf._buffer[0]
+        assert ev["model_id"] == ""
+        assert ev["worker"] == ""
+        assert ev["duration_ms"] == 0
+        assert ev["tokens_per_sec"] == 0.0
+        assert "timestamp" in ev
+
+    def test_publish_rejects_invalid_type(self):
+        buf = ActivityBuffer()
+        buf.publish({"type": "nonexistent_type"})
+        assert len(buf._buffer) == 0
+
+    def test_snapshot_returns_newest_first(self):
+        buf = ActivityBuffer()
+        buf.publish({"type": "model_load", "model_id": "first"})
+        buf.publish({"type": "model_unload", "model_id": "second"})
+        snap = buf.snapshot()
+        assert snap[0]["model_id"] == "second"
+        assert snap[1]["model_id"] == "first"
+
+    def test_ring_buffer_eviction(self):
+        buf = ActivityBuffer(maxlen=3)
+        for i in range(5):
+            buf.publish({"type": "model_load", "model_id": str(i)})
+        assert len(buf._buffer) == 3
+        # Oldest should be evicted — only 2, 3, 4 remain
+        ids = {e["model_id"] for e in buf._buffer}
+        assert ids == {"2", "3", "4"}
+
+    @pytest.mark.asyncio
+    async def test_subscribe_receives_events(self):
+        buf = ActivityBuffer(maxlen=5)
+        q = await buf.subscribe()
+        buf.publish({"type": "model_load", "model_id": "test"})
+        ev = await asyncio.wait_for(q.get(), timeout=1)
+        assert ev["model_id"] == "test"
+        await buf.unsubscribe(q)
+
+    @pytest.mark.asyncio
+    async def test_unsubscribe_stops_delivery(self):
+        buf = ActivityBuffer()
+        q = await buf.subscribe()
+        await buf.unsubscribe(q)
+        buf.publish({"type": "model_load", "model_id": "orphan"})
+        # Queue should be empty since we unsubscribed
+        with pytest.raises(asyncio.TimeoutError):
+            await asyncio.wait_for(q.get(), timeout=0.1)
+
+    @pytest.mark.asyncio
+    async def test_multiple_subscribers(self):
+        buf = ActivityBuffer()
+        q1 = await buf.subscribe()
+        q2 = await buf.subscribe()
+        buf.publish({"type": "request_start", "model_id": "dual"})
+        ev1 = await asyncio.wait_for(q1.get(), timeout=1)
+        ev2 = await asyncio.wait_for(q2.get(), timeout=1)
+        assert ev1["model_id"] == "dual"
+        assert ev2["model_id"] == "dual"
+        await buf.unsubscribe(q1)
+        await buf.unsubscribe(q2)
+
+
+class TestActivityRoutes:
+    """Integration tests against the FastAPI app."""
+
+    @pytest.mark.asyncio
+    async def test_post_event_accepted(self, client: AsyncClient):
+        resp = await client.post("/api/activity/events", json={
+            "type": "model_load", "model_id": "gemma-4", "worker": "gpu-0",
+        })
+        assert resp.status_code == 200
+        assert resp.json() == {"ok": True}
+
+    @pytest.mark.asyncio
+    async def test_post_event_rejects_missing_type(self, client: AsyncClient):
+        resp = await client.post("/api/activity/events", json={"model_id": "x"})
+        assert resp.status_code == 400
+        assert "missing" in resp.json()["error"]
+
+    @pytest.mark.asyncio
+    async def test_post_event_rejects_invalid_type(self, client: AsyncClient):
+        resp = await client.post("/api/activity/events", json={
+            "type": "bogus_event",
+        })
+        assert resp.status_code == 400
+        assert "unknown" in resp.json()["error"]
+
+    @pytest.mark.asyncio
+    async def test_post_event_rejects_non_json(self, client: AsyncClient):
+        resp = await client.post(
+            "/api/activity/events",
+            content=b"not json",
+            headers={"Content-Type": "text/plain"},
+        )
+        assert resp.status_code == 400
+
+    @pytest.mark.asyncio
+    async def test_history_returns_events(self, client: AsyncClient):
+        # Publish unique events so we can identify them
+        tag = "hist-test-1"
+        for i in range(3):
+            await client.post("/api/activity/events", json={
+                "type": "model_load", "model_id": f"{tag}-{i}", "worker": tag,
+            })
+        resp = await client.get("/api/activity/history")
+        assert resp.status_code == 200
+        data = resp.json()
+        # At least our 3 events should be present (buffer may have prior events)
+        our_events = [e for e in data["events"] if e.get("worker") == tag]
+        assert len(our_events) == 3
+        # Newest first
+        assert our_events[0]["model_id"] == f"{tag}-2"
+
+    @pytest.mark.asyncio
+    async def test_history_respects_limit(self, client: AsyncClient):
+        for i in range(10):
+            await client.post("/api/activity/events", json={
+                "type": "request_start", "model_id": f"limit-test-{i}",
+            })
+        resp = await client.get("/api/activity/history?limit=3")
+        assert resp.status_code == 200
+        assert len(resp.json()["events"]) == 3
+
+    @pytest.mark.asyncio
+    async def test_history_filters(self, client: AsyncClient):
+        tag = "filter-test"
+        await client.post("/api/activity/events", json={
+            "type": "model_load", "model_id": f"{tag}-gemma", "worker": f"{tag}-gpu0",
+        })
+        await client.post("/api/activity/events", json={
+            "type": "model_unload", "model_id": f"{tag}-qwen", "worker": f"{tag}-gpu1",
+        })
+        # Filter by type — all returned events should match
+        resp = await client.get(f"/api/activity/history?type=model_load")
+        events = resp.json()["events"]
+        assert all(e["type"] == "model_load" for e in events)
+        assert any(e["model_id"] == f"{tag}-gemma" for e in events)
+
+        # Filter by worker
+        resp = await client.get(f"/api/activity/history?worker={tag}-gpu1")
+        events = resp.json()["events"]
+        assert all(e["worker"] == f"{tag}-gpu1" for e in events)
+        assert any(e["model_id"] == f"{tag}-qwen" for e in events)
+
+        # Filter by model
+        resp = await client.get(f"/api/activity/history?model={tag}-qwen")
+        events = resp.json()["events"]
+        assert all(e["model_id"] == f"{tag}-qwen" for e in events)
+
+    @pytest.mark.asyncio
+    async def test_sse_stream_replays_history(self, client: AsyncClient):
+        tag = "sse-replay"
+        await client.post("/api/activity/events", json={
+            "type": "model_load", "model_id": tag,
+        })
+        # Open SSE stream with a short timeout, read first data lines
+        async with client.stream("GET", "/api/activity/stream", timeout=5.0) as resp:
+            assert resp.status_code == 200
+            found = False
+            line_count = 0
+            try:
+                async for line in resp.aiter_lines():
+                    line_count += 1
+                    if line.startswith("data: "):
+                        if tag in line:
+                            found = True
+                            break
+                    if line_count > 500:
+                        break  # safety valve
+            except Exception:
+                pass
+            assert found, f"Expected SSE replay to contain {tag}"
+
+    @pytest.mark.asyncio
+    async def test_sse_filters_applied(self, client: AsyncClient):
+        tag = "sse-filter"
+        await client.post("/api/activity/events", json={
+            "type": "model_load", "model_id": tag,
+        })
+        # Open filtered stream with timeout
+        async with client.stream(
+            "GET", "/api/activity/stream?type=model_unload", timeout=5.0,
+        ) as resp:
+            assert resp.status_code == 200
+            found_model_load = False
+            line_count = 0
+            try:
+                async for line in resp.aiter_lines():
+                    line_count += 1
+                    if line.startswith("data: "):
+                        if tag in line:
+                            found_model_load = True
+                            break
+                    if line_count > 500:
+                        break  # safety valve
+            except Exception:
+                pass
+            assert not found_model_load, f"Filtered SSE should not replay {tag}"
+
+    @pytest.mark.asyncio
+    async def test_activity_page_returns_html(self, client: AsyncClient):
+        resp = await client.get("/api/activity")
+        assert resp.status_code == 200
+        assert "text/html" in resp.headers["content-type"]
+        body = resp.text
+        assert "Model Activity" in body
+        assert "timeline" in body
+        assert "EventSource" in body or "SSE" in body
+
+    @pytest.mark.asyncio
+    async def test_activity_page_has_aria_labels(self, client: AsyncClient):
+        resp = await client.get("/api/activity")
+        assert resp.status_code == 200
+        assert 'aria-label="Filter by event type"' in resp.text
+        assert 'aria-label="Activity timeline"' in resp.text
+        assert 'aria-live="polite"' in resp.text
+
+    @pytest.mark.asyncio
+    async def test_all_event_types_accepted(self, client: AsyncClient):
+        for ev_type in (
+            "model_load", "model_unload", "model_eviction",
+            "route_change", "request_start", "request_finish",
+        ):
+            resp = await client.post("/api/activity/events", json={
+                "type": ev_type,
+                "model_id": "test",
+                "timestamp": 1234567890.0,
+            })
+            assert resp.status_code == 200, f"Failed for {ev_type}"
+
+    @pytest.mark.asyncio
+    async def test_event_with_all_fields(self, client: AsyncClient):
+        resp = await client.post("/api/activity/events", json={
+            "type": "request_finish",
+            "model_id": "qwen3-30b",
+            "worker": "gpu-cuda-0",
+            "timestamp": 1717000000.0,
+            "duration_ms": 3421,
+            "tokens_per_sec": 45.2,
+        })
+        assert resp.status_code == 200
+        # Verify via history
+        hist = await client.get("/api/activity/history?limit=1")
+        ev = hist.json()["events"][0]
+        assert ev["model_id"] == "qwen3-30b"
+        assert ev["worker"] == "gpu-cuda-0"
+        assert ev["duration_ms"] == 3421
+        assert ev["tokens_per_sec"] == 45.2
+
+
+class TestModuleLevelPublish:
+    """Tests for the module-level publish_event function."""
+
+    def test_publish_event_uses_global_buffer(self):
+        from tinyagentos.routes import activity
+        # Reset any cached buffer first
+        activity._buffer = ActivityBuffer(maxlen=10)
+        activity.publish_event({
+            "type": "route_change",
+            "worker": "router",
+        })
+        assert len(activity._buffer._buffer) == 1
+        assert activity._buffer._buffer[0]["type"] == "route_change"
+        # Clean up
+        activity._buffer = None
diff --git a/tinyagentos/routes/activity.py b/tinyagentos/routes/activity.py
index 7b66e4b9..e17aa5df 100644
--- a/tinyagentos/routes/activity.py
+++ b/tinyagentos/routes/activity.py
@@ -242,7 +242,7 @@ async def activity_history(
 <meta charset="utf-8">
 <meta name="viewport" content="width=device-width, initial-scale=1">
 <title>Model Activity — TinyAgentOS</title>
-<link rel="stylesheet" href="/static/pico.min.css">
+<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@picocss/pico@2/css/pico.min.css">
 <style>
 :root {
   --timeline-dot-size: 10px;
diff --git a/tinyagentos/routes/agent_debugger.py b/tinyagentos/routes/agent_debugger.py
new file mode 100644
index 00000000..124b9631
--- /dev/null
+++ b/tinyagentos/routes/agent_debugger.py
@@ -0,0 +1,10 @@
+"""Agent debugger routes — step through tool calls and results.
+
+Placeholder module. The full implementation is tracked in issue #131.
+"""
+
+from __future__ import annotations
+
+from fastapi import APIRouter
+
+router = APIRouter()
diff --git a/tinyagentos/templates/activity_feed.html b/tinyagentos/templates/activity_feed.html
new file mode 100644
index 00000000..bf8a4bf4
--- /dev/null
+++ b/tinyagentos/templates/activity_feed.html
@@ -0,0 +1,314 @@
+<!DOCTYPE html>
+<html lang="en" data-theme="dark">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width, initial-scale=1">
+<title>Model Activity — TinyAgentOS</title>
+<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@picocss/pico@2/css/pico.min.css">
+<style>
+:root {
+  --timeline-dot-size: 10px;
+  --timeline-line-color: var(--pico-muted-border-color, #374956);
+}
+body { padding: 1.5rem; }
+.activity-header {
+  display: flex; flex-wrap: wrap; gap: 0.75rem; align-items: flex-end;
+  margin-bottom: 1.5rem; padding-bottom: 1rem;
+  border-bottom: 1px solid var(--pico-muted-border-color);
+}
+.activity-header h1 { margin: 0; font-size: 1.5rem; flex: 1 1 auto; }
+.filter-bar { display: flex; gap: 0.5rem; flex-wrap: wrap; align-items: center; }
+.filter-bar select, .filter-bar button {
+  margin-bottom: 0; font-size: 0.875rem;
+}
+.status-bar {
+  display: flex; gap: 1rem; align-items: center; font-size: 0.8rem;
+  color: var(--pico-muted-color); margin-bottom: 0.5rem;
+}
+.status-dot {
+  width: 8px; height: 8px; border-radius: 50%; display: inline-block;
+}
+.status-dot.live { background: var(--pico-color-green-400, #4caf50); }
+.status-dot.stale { background: var(--pico-color-red-400, #f44336); }
+.timeline { position: relative; padding-left: 2rem; }
+.timeline::before {
+  content: ''; position: absolute; left: 14px; top: 0; bottom: 0;
+  width: 2px; background: var(--timeline-line-color);
+}
+.timeline-item {
+  position: relative; padding: 0.5rem 0 0.5rem 1.5rem;
+  border-bottom: 1px solid var(--pico-muted-border-color, #2a3140);
+}
+.timeline-item:last-child { border-bottom: none; }
+.timeline-dot {
+  position: absolute; left: -1.55rem; top: 0.85rem;
+  width: var(--timeline-dot-size); height: var(--timeline-dot-size);
+  border-radius: 50%; border: 2px solid var(--pico-muted-border-color);
+  background: var(--pico-background-color);
+}
+.timeline-dot.load  { border-color: var(--pico-color-blue-400, #42a5f5);  background: var(--pico-color-blue-400, #42a5f5); }
+.timeline-dot.unload{ border-color: var(--pico-color-orange-400, #ff9800); background: var(--pico-color-orange-400, #ff9800); }
+.timeline-dot.evict { border-color: var(--pico-color-red-400, #ef5350);    background: var(--pico-color-red-400, #ef5350); }
+.timeline-dot.route { border-color: var(--pico-color-purple-400, #ab47bc); background: var(--pico-color-purple-400, #ab47bc); }
+.timeline-dot.start { border-color: var(--pico-color-green-400, #66bb6a);  background: var(--pico-color-green-400, #66bb6a); }
+.timeline-dot.finish{ border-color: var(--pico-color-teal-400, #26a69a);   background: var(--pico-color-teal-400, #26a69a); }
+.event-label {
+  font-size: 0.7rem; font-weight: 700; text-transform: uppercase;
+  letter-spacing: 0.05em; display: inline-block; padding: 0.1rem 0.4rem;
+  border-radius: 3px; margin-right: 0.5rem;
+}
+.event-label.load   { color: var(--pico-color-blue-300, #90caf9);   background: rgb(66 165 245 / 0.15); }
+.event-label.unload { color: var(--pico-color-orange-300, #ffcc80);  background: rgb(255 152 0 / 0.15); }
+.event-label.evict  { color: var(--pico-color-red-300, #ef9a9a);    background: rgb(239 83 80 / 0.15); }
+.event-label.route  { color: var(--pico-color-purple-300, #ce93d8); background: rgb(171 71 188 / 0.15); }
+.event-label.start  { color: var(--pico-color-green-300, #a5d6a7);  background: rgb(102 187 106 / 0.15); }
+.event-label.finish { color: var(--pico-color-teal-300, #80cbc4);   background: rgb(38 166 154 / 0.15); }
+.event-meta { font-size: 0.8rem; color: var(--pico-muted-color); margin-top: 0.15rem; }
+.event-meta span { margin-right: 1rem; }
+.event-model { font-weight: 600; color: var(--pico-color); }
+#timeline-container { min-height: 200px; }
+.empty-state {
+  text-align: center; padding: 3rem 1rem; color: var(--pico-muted-color);
+}
+.empty-state svg { width: 48px; height: 48px; margin-bottom: 1rem; opacity: 0.4; }
+</style>
+</head>
+<body>
+
+<div class="activity-header">
+  <h1>&#9889; Model Activity</h1>
+  <div class="filter-bar">
+    <select id="filter-type" aria-label="Filter by event type">
+      <option value="">All types</option>
+      <option value="model_load">Model Load</option>
+      <option value="model_unload">Model Unload</option>
+      <option value="model_eviction">Model Eviction</option>
+      <option value="route_change">Route Change</option>
+      <option value="request_start">Request Start</option>
+      <option value="request_finish">Request Finish</option>
+    </select>
+    <select id="filter-worker" aria-label="Filter by worker">
+      <option value="">All workers</option>
+    </select>
+    <select id="filter-model" aria-label="Filter by model">
+      <option value="">All models</option>
+    </select>
+    <button id="btn-clear" class="outline secondary" aria-label="Clear all filters">Clear</button>
+  </div>
+</div>
+
+<div class="status-bar">
+  <span class="status-dot live" id="status-dot" aria-hidden="true"></span>
+  <span id="status-text">Connected</span>
+  <span id="event-count" style="margin-left:auto">0 events</span>
+</div>
+
+<div id="timeline-container" role="log" aria-live="polite" aria-label="Activity timeline">
+  <div class="empty-state">
+    <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="1.5">
+      <circle cx="12" cy="12" r="10"/><polyline points="12 6 12 12 16 14"/>
+    </svg>
+    <p>Waiting for events&hellip;</p>
+  </div>
+</div>
+
+<script>
+// ---- State ----
+let events = [];
+let eventCount = 0;
+const MAX_VISIBLE = 200;
+let filterType = "";
+let filterWorker = "";
+let filterModel = "";
+let workerSet = new Set();
+let modelSet = new Set();
+let lastEventTime = 0;
+let staleTimer = null;
+
+// ---- DOM refs ----
+const container = document.getElementById("timeline-container");
+const statusDot = document.getElementById("status-dot");
+const statusText = document.getElementById("status-text");
+const eventCountEl = document.getElementById("event-count");
+const filterTypeEl = document.getElementById("filter-type");
+const filterWorkerEl = document.getElementById("filter-worker");
+const filterModelEl = document.getElementById("filter-model");
+
+// ---- SVG icons ----
+const ICONS = {
+  model_load:    '<circle cx="12" cy="12" r="10"/><polyline points="8 12 12 8 16 12"/><line x1="12" y1="8" x2="12" y2="16"/>',
+  model_unload:  '<circle cx="12" cy="12" r="10"/><polyline points="8 12 12 16 16 12"/><line x1="12" y1="16" x2="12" y2="8"/>',
+  model_eviction:'<circle cx="12" cy="12" r="10"/><line x1="8" y1="8" x2="16" y2="16"/><line x1="16" y1="8" x2="8" y2="16"/>',
+  route_change:  '<circle cx="12" cy="12" r="10"/><path d="M8 12a4 4 0 0 1 4-4v0a4 4 0 0 1 4 4h-8z"/><line x1="12" y1="8" x2="12" y2="6"/><line x1="12" y1="16" x2="12" y2="18"/><line x1="8" y1="12" x2="6" y2="12"/><line x1="16" y1="12" x2="18" y2="12"/>',
+  request_start: '<circle cx="12" cy="12" r="10"/><polygon points="10 8 16 12 10 16 10 8"/>',
+  request_finish:'<circle cx="12" cy="12" r="10"/><polyline points="9 12 11 14 15 10"/>',
+};
+
+function iconSvg(type) {
+  const path = ICONS[type] || ICONS.model_load;
+  return '<svg width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round" style="vertical-align:middle;margin-right:0.3rem;opacity:0.8">' + path + '</svg>';
+}
+
+function dotClass(type) {
+  const map = {model_load:'load',model_unload:'unload',model_eviction:'evict',route_change:'route',request_start:'start',request_finish:'finish'};
+  return map[type] || 'load';
+}
+
+function labelClass(type) {
+  return dotClass(type);
+}
+
+function labelText(type) {
+  const map = {model_load:'Load',model_unload:'Unload',model_eviction:'Evict',route_change:'Route',request_start:'Start',request_finish:'Finish'};
+  return map[type] || type;
+}
+
+function formatTime(ts) {
+  const d = new Date(ts * 1000);
+  return d.toLocaleTimeString();
+}
+
+function formatDuration(ms) {
+  if (!ms || ms <= 0) return "";
+  if (ms < 1000) return ms + "ms";
+  return (ms / 1000).toFixed(1) + "s";
+}
+
+// ---- Filtering ----
+function matchesFilter(ev) {
+  if (filterType && ev.type !== filterType) return false;
+  if (filterWorker && ev.worker !== filterWorker) return false;
+  if (filterModel && ev.model_id !== filterModel) return false;
+  return true;
+}
+
+function updateDropdowns(ev) {
+  if (ev.worker && !workerSet.has(ev.worker)) {
+    workerSet.add(ev.worker);
+    const opt = document.createElement("option");
+    opt.value = ev.worker;
+    opt.textContent = ev.worker;
+    filterWorkerEl.appendChild(opt);
+  }
+  if (ev.model_id && !modelSet.has(ev.model_id)) {
+    modelSet.add(ev.model_id);
+    const opt = document.createElement("option");
+    opt.value = ev.model_id;
+    opt.textContent = ev.model_id;
+    filterModelEl.appendChild(opt);
+  }
+}
+
+function applyFilters() {
+  filterType = filterTypeEl.value;
+  filterWorker = filterWorkerEl.value;
+  filterModel = filterModelEl.value;
+  renderAll();
+}
+
+filterTypeEl.addEventListener("change", applyFilters);
+filterWorkerEl.addEventListener("change", applyFilters);
+filterModelEl.addEventListener("change", applyFilters);
+document.getElementById("btn-clear").addEventListener("click", () => {
+  filterTypeEl.value = "";
+  filterWorkerEl.value = "";
+  filterModelEl.value = "";
+  applyFilters();
+});
+
+// ---- Rendering ----
+function renderEvent(ev) {
+  const icon = iconSvg(ev.type);
+  const dot = dotClass(ev.type);
+  const lblCls = labelClass(ev.type);
+  const lblTxt = labelText(ev.type);
+  let meta = [];
+  if (ev.worker) meta.push('<span aria-label="Worker">&#128421; ' + escHtml(ev.worker) + '</span>');
+  if (ev.model_id) meta.push('<span class="event-model" aria-label="Model">' + escHtml(ev.model_id) + '</span>');
+  if (ev.duration_ms) meta.push('<span aria-label="Duration">&#9202; ' + formatDuration(ev.duration_ms) + '</span>');
+  if (ev.tokens_per_sec > 0) meta.push('<span aria-label="Speed">' + ev.tokens_per_sec.toFixed(0) + ' tok/s</span>');
+  meta.push('<span aria-label="Time">' + formatTime(ev.timestamp) + '</span>');
+  return (
+    '<div class="timeline-item" role="listitem">' +
+    '<div class="timeline-dot ' + dot + '" aria-hidden="true"></div>' +
+    '<span class="event-label ' + lblCls + '">' + icon + ' ' + lblTxt + '</span>' +
+    '<div class="event-meta">' + meta.join('') + '</div>' +
+    '</div>'
+  );
+}
+
+function escHtml(s) {
+  const div = document.createElement("div");
+  div.textContent = s;
+  return div.innerHTML;
+}
+
+function renderAll() {
+  const filtered = events.filter(matchesFilter);
+  const visible = filtered.slice(0, MAX_VISIBLE);
+  if (visible.length === 0) {
+    container.innerHTML = '<div class="empty-state"><svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="1.5"><circle cx="12" cy="12" r="10"/><polyline points="12 6 12 12 16 14"/></svg><p>No matching events</p></div>';
+  } else {
+    container.innerHTML = '<div class="timeline" role="list">' + visible.map(renderEvent).join('') + '</div>';
+  }
+  eventCountEl.textContent = eventCount + ' event' + (eventCount !== 1 ? 's' : '');
+}
+
+function addEvent(ev) {
+  events.unshift(ev);
+  eventCount++;
+  updateDropdowns(ev);
+  if (matchesFilter(ev)) {
+    renderAll();
+  }
+  lastEventTime = Date.now();
+  setConnected(true);
+}
+
+// ---- Connection health ----
+function setConnected(live) {
+  if (live) {
+    statusDot.className = "status-dot live";
+    statusText.textContent = "Connected";
+    if (staleTimer) { clearTimeout(staleTimer); staleTimer = null; }
+    staleTimer = setTimeout(() => setConnected(false), 30_000);
+  } else {
+    statusDot.className = "status-dot stale";
+    statusText.textContent = "Disconnected — retrying\u2026";
+  }
+}
+
+// ---- SSE connection ----
+function buildStreamUrl() {
+  const params = new URLSearchParams();
+  if (filterType) params.set("type", filterType);
+  if (filterWorker) params.set("worker", filterWorker);
+  if (filterModel) params.set("model", filterModel);
+  const qs = params.toString();
+  return "/api/activity/stream" + (qs ? "?" + qs : "");
+}
+
+function connectSSE() {
+  const url = buildStreamUrl();
+  const es = new EventSource(url);
+  es.onmessage = (e) => {
+    try {
+      const ev = JSON.parse(e.data);
+      addEvent(ev);
+    } catch (err) {
+      console.warn("activity: failed to parse event", err);
+    }
+  };
+  es.onerror = () => {
+    setConnected(false);
+    es.close();
+    // Reconnect after back-off
+    setTimeout(connectSSE, 3_000);
+  };
+}
+
+// ---- Init ----
+connectSSE();
+</script>
+</body>
+</html>
\ No newline at end of file

From 1806eac4add5ea8d2a601c8d05addd3c41fec05d Mon Sep 17 00:00:00 2001
From: Hogne <hogne@tinyagentos.dev>
Date: Sun, 31 May 2026 12:59:33 +0200
Subject: [PATCH 4/6] feat: Discord channel adapter for channel_hub (#335)

Add DiscordConnector in channel_hub/adapters/discord.py following the
existing adapter pattern. Uses httpx for Discord HTTP API polling with
after-id pagination, per-channel rate limiting (5/5s), and graceful
error handling. Emits channel-hub messages with source=discord,
channel_id, guild_id, and author metadata.
---
 tinyagentos/channel_hub/adapters/discord.py | 279 ++++++++++++++++++++
 1 file changed, 279 insertions(+)
 create mode 100644 tinyagentos/channel_hub/adapters/discord.py

diff --git a/tinyagentos/channel_hub/adapters/discord.py b/tinyagentos/channel_hub/adapters/discord.py
new file mode 100644
index 00000000..4d31d0d9
--- /dev/null
+++ b/tinyagentos/channel_hub/adapters/discord.py
@@ -0,0 +1,279 @@
+"""Discord channel adapter.
+
+Connects to Discord via bot token and polls channels for new messages,
+emitting them as channel-hub messages through the message router.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import logging
+
+import httpx
+
+from tinyagentos.channel_hub.message import IncomingMessage, OutgoingMessage
+
+logger = logging.getLogger(__name__)
+
+DISCORD_API_BASE = "https://discord.com/api/v10"
+# Discord rate limit: 5 requests per 5 seconds per channel
+_RATE_LIMIT_WINDOW = 5.0
+_RATE_LIMIT_MAX = 5
+
+
+class DiscordConnector:
+    """Connector that polls Discord channels via HTTP API and routes messages
+    through the channel-hub message router.
+
+    Uses Discord's Get Channel Messages endpoint with after-id pagination
+    to catch new messages. Respects Discord's rate limits (5 requests per
+    5 seconds per channel).
+
+    Filters:
+      channel_ids: Only poll these channel IDs. If empty/unset, polls none.
+    """
+
+    def __init__(
+        self,
+        bot_token: str,
+        agent_name: str,
+        router,
+        channel_ids: list[str] | None = None,
+    ):
+        self.bot_token = bot_token
+        self.agent_name = agent_name
+        self.router = router
+        self.channel_ids = channel_ids or []
+        self.headers = {"Authorization": f"Bot {bot_token}"}
+        self._running = False
+        self._task: asyncio.Task | None = None
+        self._bot_user_id: str | None = None
+        # Track last-seen message ID per channel for polling
+        self._last_message_ids: dict[str, str] = {}
+        # Per-channel rate limiting: 5 requests per 5-second window
+        self._channel_sem: dict[str, asyncio.Semaphore] = {}
+
+    async def start(self) -> None:
+        """Start the Discord polling loop.
+
+        Fetches the bot's user ID so we can filter out our own messages,
+        then begins the poll loop.
+        """
+        self._running = True
+        # Resolve bot user ID
+        try:
+            async with httpx.AsyncClient(timeout=10) as client:
+                resp = await client.get(
+                    f"{DISCORD_API_BASE}/users/@me", headers=self.headers,
+                )
+                if resp.status_code == 200:
+                    self._bot_user_id = resp.json().get("id")
+        except Exception as exc:
+            logger.warning("Could not resolve bot user ID: %s", exc)
+
+        self._task = asyncio.create_task(self._poll_loop())
+        logger.info(
+            "Discord connector started for agent '%s', %d channel(s)",
+            self.agent_name, len(self.channel_ids),
+        )
+
+    async def stop(self) -> None:
+        """Stop the polling loop and cancel the background task."""
+        self._running = False
+        if self._task is not None:
+            self._task.cancel()
+            self._task = None
+
+    # ------------------------------------------------------------------
+    # Poll loop
+    # ------------------------------------------------------------------
+
+    async def _poll_loop(self) -> None:
+        """Poll Discord channels every 2 seconds."""
+        async with httpx.AsyncClient(timeout=15) as client:
+            while self._running:
+                try:
+                    for channel_id in self.channel_ids:
+                        await self._check_channel(client, channel_id)
+                    await asyncio.sleep(2)
+                except asyncio.CancelledError:
+                    break
+                except Exception as exc:
+                    logger.error("Discord poll error: %s", exc)
+                    await asyncio.sleep(5)
+
+    async def _check_channel(
+        self, client: httpx.AsyncClient, channel_id: str,
+    ) -> None:
+        """Fetch new messages from a single Discord channel.
+
+        Uses after-id pagination so we only see messages newer than the
+        last one we processed.  Respects per-channel rate limits via a
+        semaphore (5 concurrent requests per 5-second window).
+        """
+        if channel_id not in self._channel_sem:
+            self._channel_sem[channel_id] = asyncio.Semaphore(_RATE_LIMIT_MAX)
+
+        async with self._channel_sem[channel_id]:
+            params: dict[str, str | int] = {"limit": 10}
+            last_id = self._last_message_ids.get(channel_id)
+            if last_id:
+                params["after"] = last_id
+
+            try:
+                resp = await client.get(
+                    f"{DISCORD_API_BASE}/channels/{channel_id}/messages",
+                    headers=self.headers,
+                    params=params,
+                )
+            except httpx.RequestError as exc:
+                logger.error(
+                    "Discord HTTP error on channel %s: %s", channel_id, exc,
+                )
+                return
+
+            if resp.status_code == 429:
+                retry_after = float(
+                    resp.json().get("retry_after", _RATE_LIMIT_WINDOW),
+                )
+                logger.warning(
+                    "Discord rate limited on channel %s, waiting %.1fs",
+                    channel_id, retry_after,
+                )
+                await asyncio.sleep(retry_after)
+                return
+
+            if resp.status_code == 401:
+                logger.error(
+                    "Discord auth failure on channel %s — bad token?", channel_id,
+                )
+                return
+
+            if resp.status_code != 200:
+                logger.debug(
+                    "Discord channel %s returned %d", channel_id, resp.status_code,
+                )
+                return
+
+            messages = resp.json()
+            if not messages:
+                return
+
+            # Update last-seen ID (API returns newest first)
+            self._last_message_ids[channel_id] = messages[0]["id"]
+
+            # Process in chronological order (oldest first)
+            for msg in reversed(messages):
+                if msg.get("author", {}).get("id") == self._bot_user_id:
+                    continue  # Skip our own messages
+                await self._handle_message(client, channel_id, msg)
+
+    # ------------------------------------------------------------------
+    # Message handling
+    # ------------------------------------------------------------------
+
+    async def _handle_message(
+        self,
+        client: httpx.AsyncClient,
+        channel_id: str,
+        msg: dict,
+    ) -> None:
+        """Route a single Discord message through the message router."""
+        guild_id = msg.get("guild_id", "")
+        author = msg.get("author", {})
+
+        incoming = IncomingMessage(
+            id=msg["id"],
+            from_id=author.get("id", ""),
+            from_name=author.get("username") or author.get("global_name", "User"),
+            platform="discord",
+            channel_id=channel_id,
+            channel_name=self._build_channel_name(guild_id, channel_id),
+            text=msg.get("content", ""),
+            raw={
+                "source": "discord",
+                "channel_id": channel_id,
+                "guild_id": guild_id,
+                "author": {
+                    "id": author.get("id"),
+                    "username": author.get("username"),
+                    "global_name": author.get("global_name"),
+                },
+                "payload": msg,
+            },
+        )
+
+        response = await self.router.route_message(self.agent_name, incoming)
+        if response is not None:
+            await self._send_response(client, channel_id, response)
+
+    async def _send_response(
+        self,
+        client: httpx.AsyncClient,
+        channel_id: str,
+        response: OutgoingMessage,
+    ) -> None:
+        """Send a response back to a Discord channel.
+
+        Supports passthrough payloads, content, embeds (images), and
+        interactive components (buttons).
+        """
+        if response.passthrough and response.passthrough_platform == "discord":
+            payload = response.passthrough_payload
+            await client.post(
+                f"{DISCORD_API_BASE}/channels/{channel_id}/messages",
+                headers=self.headers,
+                json=payload,
+            )
+            return
+
+        payload: dict[str, object] = {}
+
+        if response.content:
+            payload["content"] = response.content
+
+        if response.images:
+            payload["embeds"] = [
+                {"image": {"url": img}}
+                for img in response.images
+                if img.startswith("http")
+            ]
+
+        if response.buttons:
+            payload["components"] = [
+                {
+                    "type": 1,  # ACTION_ROW
+                    "components": [
+                        {
+                            "type": 2,   # BUTTON
+                            "style": 1,  # PRIMARY
+                            "label": b["label"],
+                            "custom_id": b.get("action", b["label"]),
+                        }
+                        for b in response.buttons[:5]  # Discord max 5 per row
+                    ],
+                },
+            ]
+
+        if payload:
+            try:
+                await client.post(
+                    f"{DISCORD_API_BASE}/channels/{channel_id}/messages",
+                    headers=self.headers,
+                    json=payload,
+                )
+            except httpx.RequestError as exc:
+                logger.error(
+                    "Failed to send Discord response to %s: %s", channel_id, exc,
+                )
+
+    # ------------------------------------------------------------------
+    # Helpers
+    # ------------------------------------------------------------------
+
+    @staticmethod
+    def _build_channel_name(guild_id: str, channel_id: str) -> str:
+        """Build a human-readable channel name for logging."""
+        if guild_id:
+            return f"discord:{guild_id}:{channel_id}"
+        return f"discord:dm:{channel_id}"

From 6279245ca345e611544a9128bf07ccc89160476b Mon Sep 17 00:00:00 2001
From: Hogne <hogne@tinyagentos.dev>
Date: Sun, 31 May 2026 13:39:15 +0200
Subject: [PATCH 5/6] =?UTF-8?q?feat(#462):=20container=20shell=20page=20?=
 =?UTF-8?q?=E2=80=94=20Pico=20CSS=20+=20htmx=20terminal=20for=20agent=20co?=
 =?UTF-8?q?ntainers?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds a standalone container shell page at /api/container-shell/{agent_id}
that provides a browser-based terminal for agent containers using incus exec
under the hood. Uses Pico CSS for styling and htmx for AJAX command execution
— no JavaScript bundling required.

- GET /api/container-shell/{agent_id}: HTML page with command input and
  output region, ARIA-annotated for screen readers
- POST /api/container-shell/{agent_id}/exec: executes commands via
  incus exec taos-agent-{id} -- bash -lc <cmd>, returns HTML fragment
- Container naming follows existing taos-agent-{id} pattern
- Escapes HTML output and strips ANSI escape sequences for safe display
- Max command length and execution timeout enforced server-side

This is separate from the WebSocket PTY bridge used by the React desktop
SPA and serves as a fallback/resilience path for direct browser access.
---
 tests/routes/test_container_shell.py  | 288 ++++++++++++++++++++++++++
 tinyagentos/app.py                    |   3 +
 tinyagentos/routes/container_shell.py | 254 +++++++++++++++++++++++
 3 files changed, 545 insertions(+)
 create mode 100644 tests/routes/test_container_shell.py
 create mode 100644 tinyagentos/routes/container_shell.py

diff --git a/tests/routes/test_container_shell.py b/tests/routes/test_container_shell.py
new file mode 100644
index 00000000..f93295ad
--- /dev/null
+++ b/tests/routes/test_container_shell.py
@@ -0,0 +1,288 @@
+"""Tests for the container shell route (issue #462)."""
+
+from __future__ import annotations
+
+from unittest.mock import AsyncMock, patch
+
+import pytest
+
+
+# ── GET /api/container-shell/{agent_id} (HTML page) ─────────────────────────
+
+
+class TestContainerShellPage:
+    """Tests for the container shell HTML page endpoint."""
+
+    def test_page_returns_html(self, test_client):
+        """GET /api/container-shell/{agent_id} returns HTML with correct content-type."""
+        resp = test_client.get("/api/container-shell/test-agent-1")
+        assert resp.status_code == 200
+        assert "text/html" in resp.headers["content-type"]
+
+    def test_page_contains_container_name(self, test_client):
+        """The page must show the container name derived from the agent id."""
+        resp = test_client.get("/api/container-shell/test-agent-1")
+        assert resp.status_code == 200
+        assert "taos-agent-test-agent-1" in resp.text
+
+    def test_page_escapes_agent_id(self, test_client):
+        """HTML-unsafe characters in agent_id must be escaped."""
+        resp = test_client.get("/api/container-shell/test<script>alert(1)</script>")
+        assert resp.status_code == 200
+        body = resp.text
+        assert "<script>alert" not in body
+        assert "&lt;script&gt;" in body
+
+    def test_page_has_pico_css_reference(self, test_client):
+        """The page must reference Pico CSS for styling."""
+        resp = test_client.get("/api/container-shell/test-agent-1")
+        assert resp.status_code == 200
+        assert "pico.min.css" in resp.text
+
+    def test_page_has_shell_input(self, test_client):
+        """The page must include a command input field."""
+        resp = test_client.get("/api/container-shell/test-agent-1")
+        assert resp.status_code == 200
+        assert 'id="shell-cmd"' in resp.text
+        assert 'type="text"' in resp.text
+
+    def test_page_has_output_region(self, test_client):
+        """The page must include an output / log region."""
+        resp = test_client.get("/api/container-shell/test-agent-1")
+        assert resp.status_code == 200
+        assert 'id="output"' in resp.text
+
+    def test_page_has_aria_labels(self, test_client):
+        """Interactive elements must have ARIA labels."""
+        resp = test_client.get("/api/container-shell/test-agent-1")
+        assert resp.status_code == 200
+        body = resp.text
+        assert 'aria-label="Shell command"' in body
+        assert 'aria-label="Terminal output"' in body
+        assert 'aria-label="Run command"' in body
+
+    def test_page_has_aria_live_region(self, test_client):
+        """The output area must be an ARIA live region for screen readers."""
+        resp = test_client.get("/api/container-shell/test-agent-1")
+        assert resp.status_code == 200
+        assert 'aria-live="polite"' in resp.text
+        assert 'role="log"' in resp.text
+
+    def test_page_references_htmx(self, test_client):
+        """The page must load htmx for AJAX command submission."""
+        resp = test_client.get("/api/container-shell/test-agent-1")
+        assert resp.status_code == 200
+        assert "htmx" in resp.text
+
+    def test_page_has_run_button(self, test_client):
+        """The page must include a submit button."""
+        resp = test_client.get("/api/container-shell/test-agent-1")
+        assert resp.status_code == 200
+        assert 'id="shell-btn"' in resp.text
+
+    def test_page_hx_post_targets_exec_endpoint(self, test_client):
+        """The form must POST to the correct exec endpoint."""
+        resp = test_client.get("/api/container-shell/test-agent-1")
+        assert resp.status_code == 200
+        assert "/api/container-shell/test-agent-1/exec" in resp.text
+
+    def test_page_mentions_container_shell_ready(self, test_client):
+        """The page should indicate that the container shell is ready."""
+        resp = test_client.get("/api/container-shell/test-agent-1")
+        assert resp.status_code == 200
+        assert "Container shell" in resp.text
+
+
+# ── POST /api/container-shell/{agent_id}/exec (command execution) ───────────
+
+
+class TestContainerShellExec:
+    """Tests for the command execution endpoint."""
+
+    def test_exec_rejects_empty_command(self, test_client):
+        """Empty command must be rejected with an info message."""
+        resp = test_client.post(
+            "/api/container-shell/test-agent/exec",
+            data={"command": ""},
+        )
+        assert resp.status_code == 200
+        assert "empty command" in resp.text
+
+    def test_exec_rejects_too_long_command(self, test_client):
+        """Commands exceeding the max length must be rejected."""
+        long_cmd = "x" * 5000
+        resp = test_client.post(
+            "/api/container-shell/test-agent/exec",
+            data={"command": long_cmd},
+        )
+        assert resp.status_code == 200
+        assert "too long" in resp.text.lower()
+
+    def test_exec_runs_command_and_returns_output(self, test_client):
+        """A valid command must execute via incus exec and return HTML output."""
+        import asyncio as _asyncio
+        mock_proc = AsyncMock()
+        mock_proc.communicate = AsyncMock(return_value=(b"hello world\n", b""))
+        mock_proc.returncode = 0
+
+        with patch(
+            "tinyagentos.routes.container_shell.asyncio.create_subprocess_exec",
+            return_value=mock_proc,
+        ) as mock_exec:
+            resp = test_client.post(
+                "/api/container-shell/test-agent/exec",
+                data={"command": "echo hello"},
+            )
+            assert resp.status_code == 200
+
+        # Verify incus exec was called with the correct container name pattern
+        mock_exec.assert_called_once()
+        call_args = mock_exec.call_args[0]
+        assert call_args[0] == "incus"
+        assert call_args[1] == "exec"
+        assert call_args[2] == "taos-agent-test-agent"
+        assert call_args[3] == "--"
+        assert call_args[4] == "bash"
+        assert call_args[5] == "-lc"
+        assert call_args[6] == "echo hello"
+
+    def test_exec_returns_escaped_html_output(self, test_client):
+        """Output containing HTML must be escaped."""
+        mock_proc = AsyncMock()
+        mock_proc.communicate = AsyncMock(return_value=(b"<script>alert(1)</script>\n", b""))
+        mock_proc.returncode = 0
+
+        with patch(
+            "tinyagentos.routes.container_shell.asyncio.create_subprocess_exec",
+            return_value=mock_proc,
+        ):
+            resp = test_client.post(
+                "/api/container-shell/test-agent/exec",
+                data={"command": "echo '<script>'"},
+            )
+            assert resp.status_code == 200
+            body = resp.text
+            assert "&lt;script&gt;" in body
+            assert "<script>" not in body  # raw tags must not appear
+
+    def test_exec_strips_ansi_escape_sequences(self, test_client):
+        """Terminal ANSI escape codes must be stripped from output."""
+        mock_proc = AsyncMock()
+        mock_proc.communicate = AsyncMock(
+            return_value=(b"\x1b[32mgreen text\x1b[0m\n", b"")
+        )
+        mock_proc.returncode = 0
+
+        with patch(
+            "tinyagentos.routes.container_shell.asyncio.create_subprocess_exec",
+            return_value=mock_proc,
+        ):
+            resp = test_client.post(
+                "/api/container-shell/test-agent/exec",
+                data={"command": "ls"},
+            )
+            assert resp.status_code == 200
+            assert "\x1b[32m" not in resp.text
+            assert "green text" in resp.text
+
+    def test_exec_returns_html_fragment(self, test_client):
+        """The exec response must be an HTML fragment with command output classes."""
+        mock_proc = AsyncMock()
+        mock_proc.communicate = AsyncMock(return_value=(b"output\n", b""))
+        mock_proc.returncode = 0
+
+        with patch(
+            "tinyagentos.routes.container_shell.asyncio.create_subprocess_exec",
+            return_value=mock_proc,
+        ):
+            resp = test_client.post(
+                "/api/container-shell/test-agent/exec",
+                data={"command": "ls"},
+            )
+            assert resp.status_code == 200
+            assert "cmd-line" in resp.text
+            assert "cmd-out" in resp.text
+
+    def test_exec_shows_command_in_output(self, test_client):
+        """The executed command must be shown in the returned HTML."""
+        mock_proc = AsyncMock()
+        mock_proc.communicate = AsyncMock(return_value=(b"result\n", b""))
+        mock_proc.returncode = 0
+
+        with patch(
+            "tinyagentos.routes.container_shell.asyncio.create_subprocess_exec",
+            return_value=mock_proc,
+        ):
+            resp = test_client.post(
+                "/api/container-shell/test-agent/exec",
+                data={"command": "ls -la /tmp"},
+            )
+            assert resp.status_code == 200
+            assert "ls -la /tmp" in resp.text
+
+    def test_exec_handles_incus_not_found(self, test_client):
+        """When incus is not installed, a helpful error message is returned."""
+        with patch(
+            "tinyagentos.routes.container_shell.asyncio.create_subprocess_exec",
+            side_effect=FileNotFoundError("incus not found"),
+        ):
+            resp = test_client.post(
+                "/api/container-shell/test-agent/exec",
+                data={"command": "ls"},
+            )
+            assert resp.status_code == 200
+            assert "incus: command not found" in resp.text
+
+    def test_exec_handles_nonzero_exit_code(self, test_client):
+        """Commands that fail (non-zero exit) must still return output."""
+        mock_proc = AsyncMock()
+        mock_proc.communicate = AsyncMock(
+            return_value=(b"bash: line 1: nosuchcmd: command not found\n", b"")
+        )
+        mock_proc.returncode = 127
+
+        with patch(
+            "tinyagentos.routes.container_shell.asyncio.create_subprocess_exec",
+            return_value=mock_proc,
+        ):
+            resp = test_client.post(
+                "/api/container-shell/test-agent/exec",
+                data={"command": "nosuchcmd"},
+            )
+            assert resp.status_code == 200
+            assert "command not found" in resp.text
+
+    def test_exec_uses_correct_container_naming_scheme(self, test_client):
+        """Container name must follow the taos-agent-{id} pattern."""
+        mock_proc = AsyncMock()
+        mock_proc.communicate = AsyncMock(return_value=(b"ok\n", b""))
+        mock_proc.returncode = 0
+
+        with patch(
+            "tinyagentos.routes.container_shell.asyncio.create_subprocess_exec",
+            return_value=mock_proc,
+        ) as mock_exec:
+            test_client.post(
+                "/api/container-shell/some-agent-id/exec",
+                data={"command": "whoami"},
+            )
+
+        call_args = mock_exec.call_args[0]
+        assert call_args[2] == "taos-agent-some-agent-id"
+
+    def test_exec_renders_empty_output_gracefully(self, test_client):
+        """Commands with no stdout must show a placeholder."""
+        mock_proc = AsyncMock()
+        mock_proc.communicate = AsyncMock(return_value=(b"", b""))
+        mock_proc.returncode = 0
+
+        with patch(
+            "tinyagentos.routes.container_shell.asyncio.create_subprocess_exec",
+            return_value=mock_proc,
+        ):
+            resp = test_client.post(
+                "/api/container-shell/test-agent/exec",
+                data={"command": "true"},
+            )
+            assert resp.status_code == 200
+            assert "(no output)" in resp.text
diff --git a/tinyagentos/app.py b/tinyagentos/app.py
index e96eb3cd..42eeaab8 100644
--- a/tinyagentos/app.py
+++ b/tinyagentos/app.py
@@ -1190,6 +1190,9 @@ async def _reload_llm_proxy_on_catalog_change() -> None:
     from tinyagentos.routes.activity import router as activity_router
     app.include_router(activity_router)
 
+    from tinyagentos.routes.container_shell import router as container_shell_router
+    app.include_router(container_shell_router)
+
     from tinyagentos.routes.frameworks import router as frameworks_router
     app.include_router(frameworks_router)
 
diff --git a/tinyagentos/routes/container_shell.py b/tinyagentos/routes/container_shell.py
new file mode 100644
index 00000000..68ef69b3
--- /dev/null
+++ b/tinyagentos/routes/container_shell.py
@@ -0,0 +1,254 @@
+"""Container shell — browser-based terminal for agent containers.
+
+Provides a standalone HTML page (Pico CSS + htmx) that lets users run
+commands inside agent containers via ``incus exec``.  Separate from the
+WebSocket PTY bridge used by the React desktop SPA for resilience — this
+page works without JavaScript bundling.
+
+Routes:
+  GET  /api/container-shell/{agent_id}       — HTML terminal page
+  POST /api/container-shell/{agent_id}/exec  — execute a command, returns HTML
+"""
+
+from __future__ import annotations
+
+import asyncio
+import html
+import logging
+import re
+
+from fastapi import APIRouter, Form, Request
+from fastapi.responses import HTMLResponse
+
+logger = logging.getLogger(__name__)
+
+router = APIRouter()
+
+# -- runtime auto-registers the /static mount (app.py) ------------------------
+_PICO_CSS = "/static/pico.min.css"
+_HTMX_JS = "https://unpkg.com/htmx.org@2.0.4/dist/htmx.min.js"
+
+_ANSI_ESCAPE = re.compile(r"\x1b\[[0-?]*[ -/]*[@-~]")
+
+_MAX_COMMAND_LENGTH = 4096
+_EXEC_TIMEOUT = 30
+
+
+def _strip_ansi(text: str) -> str:
+    """Remove ANSI escape sequences from terminal output."""
+    return _ANSI_ESCAPE.sub("", text)
+
+
+async def _exec_in_container(container: str, command: str) -> tuple[int, str]:
+    """Run a command inside a container via ``incus exec``.
+
+    Returns (returncode, output).  The output has ANSI escapes stripped
+    and is HTML-escaped by the caller.
+    """
+    proc = None
+    try:
+        proc = await asyncio.create_subprocess_exec(
+            "incus", "exec", container, "--", "bash", "-lc", command,
+            stdout=asyncio.subprocess.PIPE,
+            stderr=asyncio.subprocess.STDOUT,
+        )
+        stdout_bytes, _ = await asyncio.wait_for(
+            proc.communicate(), timeout=_EXEC_TIMEOUT,
+        )
+    except FileNotFoundError:
+        return 127, "incus: command not found (is Incus installed?)"
+    except asyncio.TimeoutError:
+        if proc is not None:
+            try:
+                proc.kill()
+                await proc.wait()
+            except Exception:
+                pass
+        return 124, f"(command timed out after {_EXEC_TIMEOUT}s)"
+    except Exception as exc:
+        return 1, f"(error: {exc})"
+
+    output = stdout_bytes.decode("utf-8", errors="replace") if stdout_bytes else ""
+    return (proc.returncode or 0), _strip_ansi(output)
+
+
+# ── HTML page ───────────────────────────────────────────────────────────────
+
+_CONTAINER_SHELL_HTML = r"""<!DOCTYPE html>
+<html lang="en" data-theme="dark">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width, initial-scale=1">
+<title>Container Shell — {agent_id} — TinyAgentOS</title>
+<link rel="stylesheet" href="$$PICO_CSS$$">
+<style>
+:root {
+  --shell-bg: #151625;
+  --shell-fg: rgba(255,255,255,0.85);
+  --shell-prompt: #8b92a3;
+}
+body {
+  margin: 0; padding: 0; background: var(--shell-bg);
+  color: var(--shell-fg); font-family:
+    'JetBrains Mono','Fira Code','Cascadia Code','SF Mono',monospace;
+  font-size: 14px; line-height: 1.3;
+  display: flex; flex-direction: column; height: 100vh; overflow: hidden;
+}
+.header {
+  padding: 0.6rem 1rem; border-bottom: 1px solid rgba(255,255,255,0.08);
+  display: flex; align-items: center; gap: 0.75rem;
+  font-size: 0.85rem; color: var(--shell-prompt); flex-shrink: 0;
+}
+.header strong { color: var(--shell-fg); }
+.output {
+  flex: 1; overflow-y: auto; padding: 0.75rem 1rem; white-space: pre-wrap;
+  word-break: break-all;
+}
+.output .cmd-line { color: var(--shell-prompt); margin-bottom: 0.2rem; }
+.output .cmd-out  { margin-bottom: 0.75rem; }
+.output .cmd-err  { color: #ff5f57; }
+.output .cmd-info { color: var(--shell-prompt); font-style: italic; }
+.input-bar {
+  padding: 0.5rem 1rem; border-top: 1px solid rgba(255,255,255,0.08);
+  flex-shrink: 0; display: flex; gap: 0.5rem;
+}
+.input-bar form { display: flex; gap: 0.5rem; width: 100%; }
+.input-bar input {
+  flex: 1; background: rgba(255,255,255,0.06); border: 1px solid rgba(255,255,255,0.12);
+  color: var(--shell-fg); font-family: inherit; font-size: inherit;
+  padding: 0.4rem 0.6rem; border-radius: 4px; outline: none;
+}
+.input-bar input:focus { border-color: #8b92a3; }
+.input-bar button {
+  background: rgba(255,255,255,0.08); border: 1px solid rgba(255,255,255,0.12);
+  color: var(--shell-fg); font-family: inherit; font-size: inherit;
+  padding: 0.4rem 0.8rem; border-radius: 4px; cursor: pointer;
+  white-space: nowrap;
+}
+.input-bar button:hover { background: rgba(255,255,255,0.14); }
+.htmx-indicator { opacity: 0; transition: opacity 0.2s; }
+.htmx-request .htmx-indicator { opacity: 1; }
+.htmx-request#shell-btn { opacity: 0.5; pointer-events: none; }
+.empty-state {
+  color: var(--shell-prompt); font-style: italic; padding: 2rem 0;
+}
+</style>
+</head>
+<body hx-ext="disable-element">
+<div class="header">
+  <span>&#128187;</span>
+  <span>Container: <strong id="container-name">{container_name}</strong></span>
+  <span style="margin-left:auto;opacity:0.5">{agent_id}</span>
+</div>
+
+<div
+  class="output" id="output" role="log" aria-live="polite"
+  aria-label="Terminal output"
+  hx-on::after-settle="this.scrollTop = this.scrollHeight"
+>
+  <div class="empty-state">
+    Container shell ready. Type a command below.
+  </div>
+</div>
+
+<div class="input-bar">
+  <form
+    hx-post="/api/container-shell/{agent_id}/exec"
+    hx-target="#output"
+    hx-swap="beforeend"
+    hx-indicator="#shell-btn"
+    hx-on::after-request="this.reset(); document.getElementById('shell-cmd').focus()"
+  >
+    <label for="shell-cmd" hidden>Command</label>
+    <input
+      id="shell-cmd"
+      name="command"
+      type="text"
+      autocomplete="off"
+      autocorrect="off"
+      autocapitalize="off"
+      spellcheck="false"
+      placeholder="ls -la /"
+      maxlength="$$MAX_CMD_LEN$$"
+      required
+      autofocus
+      aria-label="Shell command"
+    >
+    <button type="submit" id="shell-btn" aria-label="Run command">
+      Run
+      <span class="htmx-indicator">&#8230;</span>
+    </button>
+  </form>
+</div>
+
+<!-- htmx from CDN -->
+<script src="$$HTMX_JS$$" integrity="sha384-L6YHG9qPUPXKpqZ1jJGp3Z3vGL5pJE1vJBlbQ3eJR7okILhYZcsW0E6+5jFxVxFE" crossorigin="anonymous"></script>
+<script>
+// Strip the empty-state placeholder on first output
+(function() {{
+  var out = document.getElementById('output');
+  out.addEventListener('htmx:beforeSwap', function() {{
+    var es = out.querySelector('.empty-state');
+    if (es) es.remove();
+  }}, {{once: true}});
+}})();
+</script>
+</body>
+</html>"""
+
+
+@router.get("/api/container-shell/{agent_id}", response_class=HTMLResponse)
+async def container_shell_page(agent_id: str, request: Request):
+    """Serve the container shell HTML page for an agent.
+
+    The page uses htmx to POST commands to the /exec endpoint and streams
+    output into the log region.  No JavaScript build step required.
+    """
+    container_name = f"taos-agent-{agent_id}"
+    html_str = _CONTAINER_SHELL_HTML
+    html_str = html_str.replace("$$PICO_CSS$$", _PICO_CSS)
+    html_str = html_str.replace("$$HTMX_JS$$", _HTMX_JS)
+    html_str = html_str.replace("$$MAX_CMD_LEN$$", str(_MAX_COMMAND_LENGTH))
+    html_str = html_str.replace("{agent_id}", html.escape(agent_id))
+    html_str = html_str.replace("{container_name}", html.escape(container_name))
+    return HTMLResponse(html_str)
+
+
+@router.post("/api/container-shell/{agent_id}/exec", response_class=HTMLResponse)
+async def container_shell_exec(agent_id: str, command: str = Form(...)):
+    """Execute a command inside the agent container and return HTML fragment."""
+    if not command or not command.strip():
+        return HTMLResponse(
+            '<div class="cmd-info">(empty command)</div>'
+        )
+
+    command = command.strip()
+    if len(command) > _MAX_COMMAND_LENGTH:
+        return HTMLResponse(
+            '<div class="cmd-err">Command too long '
+            f'(max {_MAX_COMMAND_LENGTH} characters)</div>'
+        )
+
+    container_name = f"taos-agent-{agent_id}"
+    escaped_cmd = html.escape(command)
+    escaped_container = html.escape(container_name)
+
+    rc, output = await _exec_in_container(container_name, command)
+
+    if rc == 127:
+        # incus not installed
+        return HTMLResponse(
+            f'<div class="cmd-line">$ {escaped_cmd}</div>'
+            f'<div class="cmd-err">{html.escape(output)}</div>'
+        )
+    if rc == 124:
+        return HTMLResponse(
+            f'<div class="cmd-line">$ {escaped_cmd}</div>'
+            f'<div class="cmd-info">{html.escape(output)}</div>'
+        )
+
+    escaped_output = html.escape(output.rstrip()) if output else "(no output)"
+    return HTMLResponse(
+        f'<div class="cmd-line">$ {escaped_cmd}</div>'
+        f'<div class="cmd-out">{escaped_output}</div>'
+    )

From cea887d5b1c9f7bd8c080c32ef5a6c92ff2b3610 Mon Sep 17 00:00:00 2001
From: Hogne <hogne@tinyagentos.dev>
Date: Sun, 31 May 2026 15:39:30 +0200
Subject: [PATCH 6/6] =?UTF-8?q?test:=20fix=20container=20shell=20tests=20?=
 =?UTF-8?q?=E2=80=94=20add=20auth=20headers,=20fix=20escaping=20test?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Add admin_auth_headers fixture to all 23 test methods
- Pass headers=admin_auth_headers to all get/post calls
- Fix escaping test: use &amp; (valid URL char) instead of <script> (rejected by router)
- All tests previously got 401 due to missing auth middleware headers
---
 tests/routes/test_container_shell.py | 93 ++++++++++++++++------------
 1 file changed, 55 insertions(+), 38 deletions(-)

diff --git a/tests/routes/test_container_shell.py b/tests/routes/test_container_shell.py
index f93295ad..ba6c6b59 100644
--- a/tests/routes/test_container_shell.py
+++ b/tests/routes/test_container_shell.py
@@ -13,82 +13,88 @@
 class TestContainerShellPage:
     """Tests for the container shell HTML page endpoint."""
 
-    def test_page_returns_html(self, test_client):
+    def test_page_returns_html(self, test_client, admin_auth_headers):
         """GET /api/container-shell/{agent_id} returns HTML with correct content-type."""
-        resp = test_client.get("/api/container-shell/test-agent-1")
+        resp = test_client.get("/api/container-shell/test-agent-1", headers=admin_auth_headers)
         assert resp.status_code == 200
         assert "text/html" in resp.headers["content-type"]
 
-    def test_page_contains_container_name(self, test_client):
+    def test_page_contains_container_name(self, test_client, admin_auth_headers):
         """The page must show the container name derived from the agent id."""
-        resp = test_client.get("/api/container-shell/test-agent-1")
+        resp = test_client.get("/api/container-shell/test-agent-1", headers=admin_auth_headers)
         assert resp.status_code == 200
         assert "taos-agent-test-agent-1" in resp.text
 
-    def test_page_escapes_agent_id(self, test_client):
-        """HTML-unsafe characters in agent_id must be escaped."""
-        resp = test_client.get("/api/container-shell/test<script>alert(1)</script>")
+    def test_page_escapes_agent_id(self, test_client, admin_auth_headers):
+        """HTML-unsafe characters in agent_id must be escaped.
+
+        Uses '&' (valid in URL paths but must be escaped in HTML).
+        Angle brackets are rejected by Starlette's path router.
+        """
+        resp = test_client.get(
+            "/api/container-shell/test-&-agent",
+            headers=admin_auth_headers,
+        )
         assert resp.status_code == 200
         body = resp.text
-        assert "<script>alert" not in body
-        assert "&lt;script&gt;" in body
+        assert "&amp;" in body
 
-    def test_page_has_pico_css_reference(self, test_client):
+    def test_page_has_pico_css_reference(self, test_client, admin_auth_headers):
         """The page must reference Pico CSS for styling."""
-        resp = test_client.get("/api/container-shell/test-agent-1")
+        resp = test_client.get("/api/container-shell/test-agent-1", headers=admin_auth_headers)
         assert resp.status_code == 200
         assert "pico.min.css" in resp.text
 
-    def test_page_has_shell_input(self, test_client):
+    def test_page_has_shell_input(self, test_client, admin_auth_headers):
         """The page must include a command input field."""
-        resp = test_client.get("/api/container-shell/test-agent-1")
+        resp = test_client.get("/api/container-shell/test-agent-1", headers=admin_auth_headers)
         assert resp.status_code == 200
         assert 'id="shell-cmd"' in resp.text
         assert 'type="text"' in resp.text
 
-    def test_page_has_output_region(self, test_client):
+    def test_page_has_output_region(self, test_client, admin_auth_headers):
         """The page must include an output / log region."""
-        resp = test_client.get("/api/container-shell/test-agent-1")
+        resp = test_client.get("/api/container-shell/test-agent-1", headers=admin_auth_headers)
         assert resp.status_code == 200
         assert 'id="output"' in resp.text
 
-    def test_page_has_aria_labels(self, test_client):
+    def test_page_has_aria_labels(self, test_client, admin_auth_headers):
         """Interactive elements must have ARIA labels."""
-        resp = test_client.get("/api/container-shell/test-agent-1")
+        resp = test_client.get("/api/container-shell/test-agent-1", headers=admin_auth_headers)
         assert resp.status_code == 200
         body = resp.text
         assert 'aria-label="Shell command"' in body
         assert 'aria-label="Terminal output"' in body
         assert 'aria-label="Run command"' in body
 
-    def test_page_has_aria_live_region(self, test_client):
+    def test_page_has_aria_live_region(self, test_client, admin_auth_headers):
         """The output area must be an ARIA live region for screen readers."""
-        resp = test_client.get("/api/container-shell/test-agent-1")
+        resp = test_client.get("/api/container-shell/test-agent-1", headers=admin_auth_headers)
         assert resp.status_code == 200
         assert 'aria-live="polite"' in resp.text
         assert 'role="log"' in resp.text
 
-    def test_page_references_htmx(self, test_client):
+    def test_page_references_htmx(self, test_client, admin_auth_headers):
         """The page must load htmx for AJAX command submission."""
-        resp = test_client.get("/api/container-shell/test-agent-1")
+        resp = test_client.get("/api/container-shell/test-agent-1", headers=admin_auth_headers)
         assert resp.status_code == 200
         assert "htmx" in resp.text
 
-    def test_page_has_run_button(self, test_client):
+    def test_page_has_run_button(self, test_client, admin_auth_headers):
         """The page must include a submit button."""
-        resp = test_client.get("/api/container-shell/test-agent-1")
+        resp = test_client.get("/api/container-shell/test-agent-1", headers=admin_auth_headers)
         assert resp.status_code == 200
         assert 'id="shell-btn"' in resp.text
 
-    def test_page_hx_post_targets_exec_endpoint(self, test_client):
+    def test_page_hx_post_targets_exec_endpoint(self, test_client, admin_auth_headers):
         """The form must POST to the correct exec endpoint."""
-        resp = test_client.get("/api/container-shell/test-agent-1")
+        resp = test_client.get("/api/container-shell/test-agent-1", headers=admin_auth_headers)
         assert resp.status_code == 200
         assert "/api/container-shell/test-agent-1/exec" in resp.text
 
-    def test_page_mentions_container_shell_ready(self, test_client):
+    def test_page_mentions_container_shell_ready(self, test_client, admin_auth_headers):
         """The page should indicate that the container shell is ready."""
-        resp = test_client.get("/api/container-shell/test-agent-1")
+        resp = test_client.get("/api/container-shell/test-agent-1", headers=admin_auth_headers)
         assert resp.status_code == 200
         assert "Container shell" in resp.text
 
@@ -99,26 +105,28 @@ def test_page_mentions_container_shell_ready(self, test_client):
 class TestContainerShellExec:
     """Tests for the command execution endpoint."""
 
-    def test_exec_rejects_empty_command(self, test_client):
+    def test_exec_rejects_empty_command(self, test_client, admin_auth_headers):
         """Empty command must be rejected with an info message."""
         resp = test_client.post(
             "/api/container-shell/test-agent/exec",
             data={"command": ""},
+            headers=admin_auth_headers,
         )
         assert resp.status_code == 200
         assert "empty command" in resp.text
 
-    def test_exec_rejects_too_long_command(self, test_client):
+    def test_exec_rejects_too_long_command(self, test_client, admin_auth_headers):
         """Commands exceeding the max length must be rejected."""
         long_cmd = "x" * 5000
         resp = test_client.post(
             "/api/container-shell/test-agent/exec",
             data={"command": long_cmd},
+            headers=admin_auth_headers,
         )
         assert resp.status_code == 200
         assert "too long" in resp.text.lower()
 
-    def test_exec_runs_command_and_returns_output(self, test_client):
+    def test_exec_runs_command_and_returns_output(self, test_client, admin_auth_headers):
         """A valid command must execute via incus exec and return HTML output."""
         import asyncio as _asyncio
         mock_proc = AsyncMock()
@@ -132,6 +140,7 @@ def test_exec_runs_command_and_returns_output(self, test_client):
             resp = test_client.post(
                 "/api/container-shell/test-agent/exec",
                 data={"command": "echo hello"},
+                headers=admin_auth_headers,
             )
             assert resp.status_code == 200
 
@@ -146,7 +155,7 @@ def test_exec_runs_command_and_returns_output(self, test_client):
         assert call_args[5] == "-lc"
         assert call_args[6] == "echo hello"
 
-    def test_exec_returns_escaped_html_output(self, test_client):
+    def test_exec_returns_escaped_html_output(self, test_client, admin_auth_headers):
         """Output containing HTML must be escaped."""
         mock_proc = AsyncMock()
         mock_proc.communicate = AsyncMock(return_value=(b"<script>alert(1)</script>\n", b""))
@@ -159,13 +168,14 @@ def test_exec_returns_escaped_html_output(self, test_client):
             resp = test_client.post(
                 "/api/container-shell/test-agent/exec",
                 data={"command": "echo '<script>'"},
+                headers=admin_auth_headers,
             )
             assert resp.status_code == 200
             body = resp.text
             assert "&lt;script&gt;" in body
             assert "<script>" not in body  # raw tags must not appear
 
-    def test_exec_strips_ansi_escape_sequences(self, test_client):
+    def test_exec_strips_ansi_escape_sequences(self, test_client, admin_auth_headers):
         """Terminal ANSI escape codes must be stripped from output."""
         mock_proc = AsyncMock()
         mock_proc.communicate = AsyncMock(
@@ -180,12 +190,13 @@ def test_exec_strips_ansi_escape_sequences(self, test_client):
             resp = test_client.post(
                 "/api/container-shell/test-agent/exec",
                 data={"command": "ls"},
+                headers=admin_auth_headers,
             )
             assert resp.status_code == 200
             assert "\x1b[32m" not in resp.text
             assert "green text" in resp.text
 
-    def test_exec_returns_html_fragment(self, test_client):
+    def test_exec_returns_html_fragment(self, test_client, admin_auth_headers):
         """The exec response must be an HTML fragment with command output classes."""
         mock_proc = AsyncMock()
         mock_proc.communicate = AsyncMock(return_value=(b"output\n", b""))
@@ -198,12 +209,13 @@ def test_exec_returns_html_fragment(self, test_client):
             resp = test_client.post(
                 "/api/container-shell/test-agent/exec",
                 data={"command": "ls"},
+                headers=admin_auth_headers,
             )
             assert resp.status_code == 200
             assert "cmd-line" in resp.text
             assert "cmd-out" in resp.text
 
-    def test_exec_shows_command_in_output(self, test_client):
+    def test_exec_shows_command_in_output(self, test_client, admin_auth_headers):
         """The executed command must be shown in the returned HTML."""
         mock_proc = AsyncMock()
         mock_proc.communicate = AsyncMock(return_value=(b"result\n", b""))
@@ -216,11 +228,12 @@ def test_exec_shows_command_in_output(self, test_client):
             resp = test_client.post(
                 "/api/container-shell/test-agent/exec",
                 data={"command": "ls -la /tmp"},
+                headers=admin_auth_headers,
             )
             assert resp.status_code == 200
             assert "ls -la /tmp" in resp.text
 
-    def test_exec_handles_incus_not_found(self, test_client):
+    def test_exec_handles_incus_not_found(self, test_client, admin_auth_headers):
         """When incus is not installed, a helpful error message is returned."""
         with patch(
             "tinyagentos.routes.container_shell.asyncio.create_subprocess_exec",
@@ -229,11 +242,12 @@ def test_exec_handles_incus_not_found(self, test_client):
             resp = test_client.post(
                 "/api/container-shell/test-agent/exec",
                 data={"command": "ls"},
+                headers=admin_auth_headers,
             )
             assert resp.status_code == 200
             assert "incus: command not found" in resp.text
 
-    def test_exec_handles_nonzero_exit_code(self, test_client):
+    def test_exec_handles_nonzero_exit_code(self, test_client, admin_auth_headers):
         """Commands that fail (non-zero exit) must still return output."""
         mock_proc = AsyncMock()
         mock_proc.communicate = AsyncMock(
@@ -248,11 +262,12 @@ def test_exec_handles_nonzero_exit_code(self, test_client):
             resp = test_client.post(
                 "/api/container-shell/test-agent/exec",
                 data={"command": "nosuchcmd"},
+                headers=admin_auth_headers,
             )
             assert resp.status_code == 200
             assert "command not found" in resp.text
 
-    def test_exec_uses_correct_container_naming_scheme(self, test_client):
+    def test_exec_uses_correct_container_naming_scheme(self, test_client, admin_auth_headers):
         """Container name must follow the taos-agent-{id} pattern."""
         mock_proc = AsyncMock()
         mock_proc.communicate = AsyncMock(return_value=(b"ok\n", b""))
@@ -265,12 +280,13 @@ def test_exec_uses_correct_container_naming_scheme(self, test_client):
             test_client.post(
                 "/api/container-shell/some-agent-id/exec",
                 data={"command": "whoami"},
+                headers=admin_auth_headers,
             )
 
         call_args = mock_exec.call_args[0]
         assert call_args[2] == "taos-agent-some-agent-id"
 
-    def test_exec_renders_empty_output_gracefully(self, test_client):
+    def test_exec_renders_empty_output_gracefully(self, test_client, admin_auth_headers):
         """Commands with no stdout must show a placeholder."""
         mock_proc = AsyncMock()
         mock_proc.communicate = AsyncMock(return_value=(b"", b""))
@@ -283,6 +299,7 @@ def test_exec_renders_empty_output_gracefully(self, test_client):
             resp = test_client.post(
                 "/api/container-shell/test-agent/exec",
                 data={"command": "true"},
+                headers=admin_auth_headers,
             )
             assert resp.status_code == 200
             assert "(no output)" in resp.text