From 62722de2850443641decadfa333a89dd98c529b9 Mon Sep 17 00:00:00 2001
From: Tomasz Iniewicz <itomek@users.noreply.github.com>
Date: Mon, 20 Apr 2026 18:50:45 -0400
Subject: [PATCH 1/7] fix(ui): honor custom agent model_id when session is at
 DB default (#841)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Previously, _chat_helpers.py always passed model_id=<session model> explicitly
to registry.create_agent(), defeating kwargs.setdefault("model_id", ...) in
custom agents — which only fires when the key is absent.

Fix: build create_kwargs conditionally, omitting model_id when the session is
at the DB default so the agent's __init__ setdefault governs. Also use
agent.model_id (post-construction) for both _store_agent cache key and the
pre-flight _maybe_load_expected_model call.

Three-branch precedence: custom_model setting > session-explicit > omit kwarg.

Closes #841
---
 src/gaia/ui/_chat_helpers.py                  |  97 ++++--
 tests/integration/test_chat_ui_integration.py |  91 ++++++
 .../unit/chat/ui/test_agent_model_override.py |  58 ++++
 .../ui/test_chat_helpers_model_resolution.py  | 288 ++++++++++++++++++
 4 files changed, 511 insertions(+), 23 deletions(-)
 create mode 100644 tests/unit/chat/ui/test_agent_model_override.py
 create mode 100644 tests/unit/chat/ui/test_chat_helpers_model_resolution.py
diff --git a/src/gaia/ui/_chat_helpers.py b/src/gaia/ui/_chat_helpers.py
index cb23682c5..ad476e581 100644
--- a/src/gaia/ui/_chat_helpers.py
+++ b/src/gaia/ui/_chat_helpers.py
@@ -73,6 +73,11 @@ def get_agent_registry():
 _agent_cache_lock = threading.Lock()
 _MAX_CACHED_AGENTS = 10
 
+# Matches the fallback default in gaia.ui.database.create_session (~line 233).
+# Kept local to avoid widening _chat_helpers.py's coupling to database.py for
+# a cosmetic rename. If that value changes, update here too.
+_DB_DEFAULT_MODEL = "Qwen3.5-35B-A3B-GGUF"
+
 # Last known MCP runtime status — updated after each agent setup so
 # GET /api/mcp/status can return it without needing a running chat.
 _mcp_status_cache: list[dict] = []
@@ -554,19 +559,38 @@ def _do_chat():
                     agent_type,
                     session_id[:8],
                 )
-                agent = registry.create_agent(
-                    agent_type,
-                    model_id=model_id,
-                    silent_mode=True,
-                    debug=False,
-                )
+                create_kwargs: dict = {"silent_mode": True, "debug": False}
+                if custom_model:
+                    create_kwargs["model_id"] = custom_model
+                    logger.info(
+                        "create_agent: custom_model override -> %r", custom_model
+                    )
+                elif model_id and model_id != _DB_DEFAULT_MODEL:
+                    create_kwargs["model_id"] = model_id
+                    logger.info("create_agent: session-explicit model -> %r", model_id)
+                else:
+                    # Omit model_id so kwargs.setdefault in the agent's __init__ fires.
+                    # setdefault only works when the key is ABSENT — passing None or the
+                    # DB default explicitly defeats it. This is the fix for issue #841.
+                    logger.info(
+                        "create_agent: omitting model_id kwarg (session at DB default %r); "
+                        "agent's kwargs.setdefault or AgentConfig fallback will govern",
+                        _DB_DEFAULT_MODEL,
+                    )
+                agent = registry.create_agent(agent_type, **create_kwargs)
                 logger.info(
                     "chat: Invoking agent %s for session %s, model=%s",
                     agent_type,
                     session_id[:8],
-                    model_id,
+                    getattr(agent, "model_id", model_id),
+                )
+                _store_agent(
+                    session_id,
+                    getattr(agent, "model_id", None) or model_id,
+                    document_ids,
+                    agent,
+                    agent_type,
                 )
-                _store_agent(session_id, model_id, document_ids, agent, agent_type)
 
         # Restore conversation history (limited to prevent context overflow).
         # Always re-inject from DB so the history is consistent with what was
@@ -585,8 +609,11 @@ def _do_chat():
             agent.conversation_history.append({"role": "user", "content": u})
             agent.conversation_history.append({"role": "assistant", "content": a})
 
-        # Pre-flight: same fix as the streaming path — see _maybe_load_expected_model.
-        _maybe_load_expected_model(model_id)
+        # Pre-flight on agent's ACTUAL effective model. When model_id kwarg was
+        # omitted above, the agent's __init__ set model_id via kwargs.setdefault —
+        # a value invisible to us pre-construction. Using agent.model_id preserves
+        # the existing 100-900s silent-hang protection for all code paths.
+        _maybe_load_expected_model(getattr(agent, "model_id", None) or model_id)
 
         result = agent.process_query(request.message)
         if isinstance(result, dict):
@@ -913,19 +940,36 @@ def _run_agent():
                             session_id[:8],
                         )
                         t_construct = _time.monotonic()
-                        agent = registry.create_agent(
-                            agent_type,
-                            model_id=model_id,
-                            streaming=True,
-                            silent_mode=False,
-                            debug=False,
-                        )
+                        create_kwargs = {
+                            "streaming": True,
+                            "silent_mode": False,
+                            "debug": False,
+                        }
+                        if custom_model:
+                            create_kwargs["model_id"] = custom_model
+                            logger.info(
+                                "create_agent: custom_model override -> %r (streaming)",
+                                custom_model,
+                            )
+                        elif model_id and model_id != _DB_DEFAULT_MODEL:
+                            create_kwargs["model_id"] = model_id
+                            logger.info(
+                                "create_agent: session-explicit model -> %r (streaming)",
+                                model_id,
+                            )
+                        else:
+                            logger.info(
+                                "create_agent: omitting model_id kwarg (session at DB default %r); "
+                                "agent's kwargs.setdefault or AgentConfig fallback will govern (streaming)",
+                                _DB_DEFAULT_MODEL,
+                            )
+                        agent = registry.create_agent(agent_type, **create_kwargs)
                         agent.console = sse_handler
                         logger.info(
                             "chat: Invoking agent %s for session %s, model=%s took=%.3fs",
                             agent_type,
                             session_id[:8],
-                            model_id,
+                            getattr(agent, "model_id", model_id),
                             _time.monotonic() - t_construct,
                         )
 
@@ -937,7 +981,11 @@ def _run_agent():
                             _index_rag_with_progress(agent, rag_file_paths, sse_handler)
 
                         _store_agent(
-                            session_id, model_id, document_ids, agent, agent_type
+                            session_id,
+                            getattr(agent, "model_id", None) or model_id,
+                            document_ids,
+                            agent,
+                            agent_type,
                         )
 
                     sse_handler._emit(
@@ -987,10 +1035,13 @@ def _run_agent():
                 if sse_handler.cancelled.is_set():
                     return
 
-                # Pre-flight: ensure a chat-capable LLM is active before sending the query.
-                # Lemonade silently hangs when no model is loaded or the embedding model is
-                # active — no error is returned, so _execute_with_auto_download never fires.
-                _maybe_load_expected_model(model_id, sse_handler)
+                # Pre-flight on agent's ACTUAL effective model. When model_id kwarg was
+                # omitted, the agent's __init__ set model_id via kwargs.setdefault — a value
+                # invisible pre-construction. Using agent.model_id preserves the existing
+                # 100-900s silent-hang protection for all code paths including setdefault.
+                _maybe_load_expected_model(
+                    getattr(agent, "model_id", None) or model_id, sse_handler
+                )
 
                 # -- Phase 5: Query processing --
                 t_query = _time.monotonic()
diff --git a/tests/integration/test_chat_ui_integration.py b/tests/integration/test_chat_ui_integration.py
index ee6db321a..b092a9396 100644
--- a/tests/integration/test_chat_ui_integration.py
+++ b/tests/integration/test_chat_ui_integration.py
@@ -1591,3 +1591,94 @@ def test_delete_messages_from_session_not_found(self, client):
         """DELETE .../and-below returns 404 for non-existent session."""
         resp = client.delete("/api/sessions/nonexistent/messages/1/and-below")
         assert resp.status_code == 404
+
+
+# ── Issue #841 regression: custom agent model_id honored through API ──────────
+
+
+class TestCustomAgentModelChoice:
+    """Verify that a custom Python agent's kwargs.setdefault model_id reaches the
+    registry.create_agent call without model_id being passed as an explicit kwarg.
+
+    This is the integration-layer pin for issue #841. It exercises the full
+    path: HTTP POST → session → _get_chat_response → registry.create_agent.
+    """
+
+    def test_custom_agent_model_id_honored_through_api(self, tmp_path):
+        import textwrap
+
+        agents_dir = tmp_path / ".gaia" / "agents" / "smallbot"
+        agents_dir.mkdir(parents=True)
+        (agents_dir / "agent.py").write_text(textwrap.dedent("""
+            from gaia.agents.base.agent import Agent
+
+            class SmallBot(Agent):
+                AGENT_ID = "smallbot"
+                AGENT_NAME = "SmallBot"
+
+                def __init__(self, **kwargs):
+                    kwargs.setdefault("model_id", "Qwen3.5-4B-GGUF")
+                    super().__init__(skip_lemonade=True, **kwargs)
+
+                def _get_system_prompt(self):
+                    return "x"
+
+                def _register_tools(self):
+                    pass
+        """))
+
+        # HOME patch must wrap the full lifespan: discover() fires on __enter__.
+        with patch("gaia.agents.registry.Path.home", return_value=tmp_path):
+            app = create_app(db_path=":memory:")
+
+            with TestClient(app) as client:
+                # Spy on create_agent AFTER lifespan fires (registry exists now).
+                captured = {}
+                original_create = app.state.agent_registry.create_agent
+
+                def _spy(agent_id, **kwargs):
+                    if agent_id == "smallbot":
+                        captured["model_id_kwarg"] = kwargs.get("model_id", "<omitted>")
+                    agent = original_create(agent_id, **kwargs)
+                    if agent_id == "smallbot":
+                        captured["agent_model_id"] = getattr(agent, "model_id", None)
+                    return agent
+
+                app.state.agent_registry.create_agent = _spy
+
+                # Create a session typed to our custom agent.
+                sess_resp = client.post(
+                    "/api/sessions",
+                    json={"title": "841-test", "agent_type": "smallbot"},
+                )
+                assert sess_resp.status_code == 200, sess_resp.text
+                sid = sess_resp.json()["id"]
+
+                # Send a chat message, bypassing Lemonade and LLM.
+                with (
+                    patch("gaia.ui._chat_helpers._maybe_load_expected_model"),
+                    patch(
+                        "gaia.ui._chat_helpers._agent_registry",
+                        app.state.agent_registry,
+                    ),
+                ):
+                    chat_resp = client.post(
+                        "/api/chat/send",
+                        json={
+                            "session_id": sid,
+                            "message": "hi",
+                            "stream": False,
+                        },
+                    )
+
+                assert chat_resp.status_code == 200, chat_resp.text
+
+        assert captured, "create_agent spy was never called for smallbot"
+        assert captured["model_id_kwarg"] == "<omitted>", (
+            f"Issue #841: model_id kwarg must be omitted when session is at DB default; "
+            f"got model_id_kwarg={captured['model_id_kwarg']!r}"
+        )
+        assert captured["agent_model_id"] == "Qwen3.5-4B-GGUF", (
+            f"Issue #841: agent.model_id must reflect kwargs.setdefault value; "
+            f"got {captured['agent_model_id']!r}"
+        )
diff --git a/tests/unit/chat/ui/test_agent_model_override.py b/tests/unit/chat/ui/test_agent_model_override.py
new file mode 100644
index 000000000..19d116eb5
--- /dev/null
+++ b/tests/unit/chat/ui/test_agent_model_override.py
@@ -0,0 +1,58 @@
+# Copyright(C) 2025-2026 Advanced Micro Devices, Inc. All rights reserved.
+# SPDX-License-Identifier: MIT
+
+"""Regression test for issue #841: custom agent's model_id ignored by UI."""
+
+import textwrap
+from unittest.mock import patch
+
+from gaia.agents.registry import AgentRegistry
+
+
+def test_issue_841_custom_python_agent_model_id_respected(tmp_path):
+    """A custom Python agent using kwargs.setdefault in __init__ must be
+    instantiated with its own model_id when the UI omits the kwarg.
+
+    On pre-fix main, the UI always passes model_id=<session model> explicitly
+    to registry.create_agent — defeating kwargs.setdefault, which only fires
+    when the key is ABSENT. After T3 lands, the UI omits model_id when no
+    explicit user choice exists, so setdefault fires as the agent intends.
+
+    This test simulates the fixed UI call pattern: calling create_agent without
+    model_id, and asserting the agent's declared default is respected.
+    """
+    agents_dir = tmp_path / ".gaia" / "agents" / "foo"
+    agents_dir.mkdir(parents=True)
+    (agents_dir / "agent.py").write_text(textwrap.dedent("""
+        from gaia.agents.base.agent import Agent
+
+        class FooAgent(Agent):
+            AGENT_ID = "foo"
+            AGENT_NAME = "Foo"
+
+            def __init__(self, **kwargs):
+                kwargs.setdefault("model_id", "Qwen3.5-4B-GGUF")
+                super().__init__(skip_lemonade=True, **kwargs)
+
+            def _get_system_prompt(self):
+                return "foo"
+
+            def _register_tools(self):
+                pass
+    """))
+
+    with patch("gaia.agents.registry.Path.home", return_value=tmp_path):
+        registry = AgentRegistry()
+        registry.discover()
+
+    reg = registry.get("foo")
+    assert reg is not None, "custom agent should be discovered under patched HOME"
+
+    # Simulate what the fixed UI does when no explicit user choice exists:
+    # OMIT the model_id kwarg entirely so setdefault fires.
+    agent = registry.create_agent("foo", silent_mode=True, debug=False)
+
+    assert agent.model_id == "Qwen3.5-4B-GGUF", (
+        f"Issue #841: custom agent's kwargs.setdefault('model_id', ...) must "
+        f"govern when UI omits the kwarg; got {agent.model_id!r}"
+    )
diff --git a/tests/unit/chat/ui/test_chat_helpers_model_resolution.py b/tests/unit/chat/ui/test_chat_helpers_model_resolution.py
new file mode 100644
index 000000000..fb402e409
--- /dev/null
+++ b/tests/unit/chat/ui/test_chat_helpers_model_resolution.py
@@ -0,0 +1,288 @@
+# Copyright(C) 2025-2026 Advanced Micro Devices, Inc. All rights reserved.
+# SPDX-License-Identifier: MIT
+
+"""Unit tests for the model_id kwarg selection logic in _chat_helpers.py.
+
+Covers the three-branch precedence chain introduced by the #841 fix:
+  1. custom_model setting wins over everything
+  2. Session-explicit model (anything != DB default) is honored
+  3. model_id kwarg OMITTED when session is at the DB default, so that the
+     custom agent's kwargs.setdefault("model_id", ...) fires (the #841 fix)
+
+Also pins: streaming vs non-streaming silent_mode values, static source-grep
+guard against reintroduction of the antipattern, post-construction pre-flight
+contract, and built-in ChatAgent (agent_type="chat") behavior unchanged.
+"""
+
+import asyncio
+from pathlib import Path
+from unittest.mock import MagicMock, patch
+
+# DB default must match the value used by gaia.ui.database.create_session
+_DB_DEFAULT = "Qwen3.5-35B-A3B-GGUF"
+
+
+# ── Helpers ──────────────────────────────────────────────────────────────────
+
+
+def _run_sync(coro):
+    return asyncio.get_event_loop().run_until_complete(coro)
+
+
+def _make_session(model=_DB_DEFAULT, agent_type="bot"):
+    return {
+        "document_ids": [],
+        "model": model,
+        "agent_type": agent_type,
+        "session_id": "sess-1",
+    }
+
+
+def _make_db(custom_model=None):
+    db = MagicMock()
+    db.get_messages.return_value = []
+    db.get_setting.return_value = custom_model
+    db.list_documents.return_value = []
+    db.update_session.return_value = None
+    db.get_session.return_value = {}
+    return db
+
+
+def _make_registry(resolve_model_return=None, setdefault_model="SetdefaultChose-GGUF"):
+    """Return (registry_mock, captured_dict).
+
+    captured["kwargs"] holds the kwargs received by create_agent.
+    The fake agent's model_id mimics kwargs.setdefault: if model_id was NOT
+    passed, it is set to setdefault_model; otherwise it keeps the passed value.
+    """
+    registry = MagicMock()
+    registry.get.return_value = True  # agent_type is registered
+    registry.resolve_model.return_value = resolve_model_return
+
+    captured = {}
+
+    def _spy(agent_id, **kwargs):
+        captured["kwargs"] = dict(kwargs)
+        fake = MagicMock()
+        fake.model_id = kwargs.get("model_id", setdefault_model)
+        fake.process_query.return_value = "ok"
+        fake.conversation_history = []
+        fake.indexed_files = set()
+        return fake
+
+    registry.create_agent.side_effect = _spy
+    return registry, captured
+
+
+def _call_non_streaming(session, db, agent_type_override=None, session_id="sess-1"):
+    import gaia.ui._chat_helpers as _helpers
+    from gaia.ui._chat_helpers import _get_chat_response
+    from gaia.ui.models import ChatRequest
+
+    # Clear the agent cache so tests don't interfere with each other.
+    with _helpers._agent_cache_lock:
+        _helpers._agent_cache.clear()
+
+    request = ChatRequest(
+        session_id=session_id,
+        message="hi",
+        stream=False,
+        agent_type=agent_type_override,
+    )
+    session = dict(session)
+    session.setdefault("session_id", session_id)
+    return _run_sync(_get_chat_response(db, session, request))
+
+
+# ── Tests ─────────────────────────────────────────────────────────────────────
+
+
+class TestModelKwargSelection:
+    """Verify the three-branch model_id selection at both call sites."""
+
+    def test_custom_model_setting_wins_over_everything(self):
+        """db.get_setting('custom_model') result always reaches create_agent as model_id."""
+        registry, captured = _make_registry(setdefault_model="AgentPref-GGUF")
+        db = _make_db(custom_model="UserPicked-GGUF")
+        session = _make_session(model=_DB_DEFAULT)
+
+        with (
+            patch("gaia.ui._chat_helpers._agent_registry", registry),
+            patch("gaia.ui._chat_helpers._maybe_load_expected_model"),
+        ):
+            _call_non_streaming(session, db)
+
+        assert captured["kwargs"].get("model_id") == "UserPicked-GGUF"
+
+    def test_session_explicit_model_honored(self):
+        """A session model that differs from the DB default is forwarded as model_id."""
+        registry, captured = _make_registry()
+        db = _make_db(custom_model=None)
+        session = _make_session(model="UserChose-GGUF")
+
+        with (
+            patch("gaia.ui._chat_helpers._agent_registry", registry),
+            patch("gaia.ui._chat_helpers._maybe_load_expected_model"),
+        ):
+            _call_non_streaming(session, db)
+
+        assert captured["kwargs"].get("model_id") == "UserChose-GGUF"
+
+    def test_model_id_kwarg_omitted_when_session_at_db_default(self):
+        """Core #841 fix: model_id kwarg must be ABSENT when session == DB default.
+
+        kwargs.setdefault only fires when the key is absent. The pre-fix code
+        always passes model_id=<session default> explicitly, defeating setdefault.
+        After the fix, model_id is omitted so the agent's __init__ governs.
+        """
+        registry, captured = _make_registry(setdefault_model="SetdefaultChose-GGUF")
+        db = _make_db(custom_model=None)
+        session = _make_session(model=_DB_DEFAULT)
+
+        with (
+            patch("gaia.ui._chat_helpers._agent_registry", registry),
+            patch("gaia.ui._chat_helpers._maybe_load_expected_model"),
+        ):
+            _call_non_streaming(session, db)
+
+        assert "model_id" not in captured.get("kwargs", {}), (
+            "Issue #841: model_id kwarg must be omitted when session is at DB default; "
+            f"got kwargs={captured.get('kwargs')}"
+        )
+        # The spy's setdefault model should be what the agent ends up with.
+        assert (
+            captured.get("kwargs", {}).get("model_id", "SetdefaultChose-GGUF")
+            == "SetdefaultChose-GGUF"
+        )
+
+    def test_model_id_kwarg_omitted_when_session_model_is_none(self):
+        """model_id kwarg is omitted when session model is None (unset session)."""
+        registry, captured = _make_registry(setdefault_model="SetdefaultChose-GGUF")
+        db = _make_db(custom_model=None)
+        session = _make_session(model=None)
+
+        with (
+            patch("gaia.ui._chat_helpers._agent_registry", registry),
+            patch("gaia.ui._chat_helpers._maybe_load_expected_model"),
+        ):
+            _call_non_streaming(session, db)
+
+        assert "model_id" not in captured.get("kwargs", {}), (
+            f"model_id kwarg must be omitted when session model is None; "
+            f"got kwargs={captured.get('kwargs')}"
+        )
+
+    def test_non_streaming_path_silent_mode_true_preserved(self):
+        """Non-streaming create_agent call must pass silent_mode=True."""
+        registry, captured = _make_registry()
+        db = _make_db(custom_model=None)
+        session = _make_session(model=_DB_DEFAULT)
+
+        with (
+            patch("gaia.ui._chat_helpers._agent_registry", registry),
+            patch("gaia.ui._chat_helpers._maybe_load_expected_model"),
+        ):
+            _call_non_streaming(session, db)
+
+        assert captured.get("kwargs", {}).get("silent_mode") is True, (
+            "Non-streaming path must pass silent_mode=True to create_agent; "
+            f"got kwargs={captured.get('kwargs')}"
+        )
+        assert "streaming" not in captured.get(
+            "kwargs", {}
+        ), "Non-streaming path must not pass streaming=True to create_agent"
+
+
+class TestStaticRegressionGuard:
+    """Source-level pin against reintroduction of the antipattern."""
+
+    def test_no_direct_model_id_kwarg_in_create_agent_calls(self):
+        """registry.create_agent must never be called with model_id=model_id directly.
+
+        The pre-fix antipattern was:
+            registry.create_agent(agent_type, model_id=model_id, ...)
+        which always passes the kwarg explicitly, defeating kwargs.setdefault.
+
+        ChatAgentConfig(model_id=model_id, ...) is legitimate and intentionally
+        excluded from this check — only create_agent calls are guarded.
+
+        This test catches future regressions at the source level in <5ms.
+        """
+        import re
+
+        src = (Path(__file__).parents[4] / "src/gaia/ui/_chat_helpers.py").read_text()
+        # Matches the old antipattern: create_agent(... model_id=model_id ...)
+        # Uses DOTALL so it catches multiline calls.
+        match = re.search(r"create_agent\([^)]*model_id=model_id", src, re.DOTALL)
+        assert not match, (
+            "Issue #841 regression: registry.create_agent must not receive "
+            "model_id=model_id as a direct kwarg. Build create_kwargs conditionally "
+            "and omit model_id when no explicit user choice exists.\n"
+            f"Match found at: {match.group()[:80]!r}"
+        )
+
+
+class TestPostConstructionPreflight:
+    """Verify pre-flight uses agent.model_id (not pre-call model_id variable)."""
+
+    def test_preflight_receives_agent_effective_model(self):
+        """_maybe_load_expected_model must be called with the agent's actual model_id.
+
+        When model_id kwarg is omitted, the agent's __init__ sets model_id via
+        setdefault AFTER construction. The pre-fix code called
+        _maybe_load_expected_model(model_id) with the pre-call variable (DB
+        default), missing the agent's actual effective model. The fix calls it
+        with agent.model_id so Lemonade pre-flight fires for the right model.
+        """
+        registry, captured = _make_registry(setdefault_model="SetdefaultChose-GGUF")
+        db = _make_db(custom_model=None)
+        session = _make_session(model=_DB_DEFAULT)
+
+        preflight_calls = []
+
+        def _spy_preflight(model_id, *args, **kwargs):
+            preflight_calls.append(model_id)
+
+        with (
+            patch("gaia.ui._chat_helpers._agent_registry", registry),
+            patch(
+                "gaia.ui._chat_helpers._maybe_load_expected_model",
+                side_effect=_spy_preflight,
+            ),
+        ):
+            _call_non_streaming(session, db)
+
+        assert preflight_calls, "_maybe_load_expected_model was never called"
+        # After the fix, pre-flight must use the agent's actual model_id
+        # ("SetdefaultChose-GGUF"), not the DB default it was seeded with.
+        assert preflight_calls[-1] == "SetdefaultChose-GGUF", (
+            f"Pre-flight must use agent.model_id after construction; "
+            f"got {preflight_calls[-1]!r} (expected 'SetdefaultChose-GGUF')"
+        )
+
+
+class TestBuiltinChatAgentUnchanged:
+    """Pin AC4: built-in ChatAgent (agent_type='chat') behavior is unchanged."""
+
+    def test_chat_agent_type_bypasses_registry(self):
+        """agent_type='chat' must not go through registry.create_agent."""
+        registry, captured = _make_registry()
+        db = _make_db(custom_model=None)
+        session = _make_session(model=_DB_DEFAULT, agent_type="chat")
+
+        fake_agent = MagicMock()
+        fake_agent.process_query.return_value = "ok"
+        fake_agent.conversation_history = []
+        fake_agent.indexed_files = set()
+        fake_agent.rag = None
+
+        with (
+            patch("gaia.ui._chat_helpers._agent_registry", registry),
+            patch("gaia.ui._chat_helpers._maybe_load_expected_model"),
+            patch("gaia.agents.chat.agent.ChatAgent", return_value=fake_agent),
+            patch("gaia.agents.chat.agent.ChatAgentConfig"),
+        ):
+            _call_non_streaming(session, db, agent_type_override=None)
+
+        # registry.create_agent must NOT have been called for the chat path
+        registry.create_agent.assert_not_called()

From 4acfd400bba16654eb6f24865a8ade31173e51d2 Mon Sep 17 00:00:00 2001
From: Tomasz Iniewicz <itomek@users.noreply.github.com>
Date: Mon, 20 Apr 2026 19:10:16 -0400
Subject: [PATCH 2/7] fix(ui): extract _build_create_kwargs/_effective_model,
 import SESSION_DEFAULT_MODEL

Addresses code review feedback on PR #842:

- Export SESSION_DEFAULT_MODEL from database.py (single source of truth)
  instead of duplicating the string literal in _chat_helpers.py
- Extract _build_create_kwargs() helper to eliminate the duplicate three-branch
  create_kwargs logic across non-streaming and streaming code paths
- Extract _effective_model() helper using explicit None check (not `or`)
  to safely read agent.model_id post-construction without treating empty
  string as missing
- Fix static regression guard regex to use [^()]* so nested helper calls
  inside create_agent() are not falsely flagged
- Update unit test to import SESSION_DEFAULT_MODEL instead of hardcoding
---
 src/gaia/ui/_chat_helpers.py                  | 131 ++++++++++--------
 src/gaia/ui/database.py                       |   6 +-
 .../ui/test_chat_helpers_model_resolution.py  |  11 +-
 3 files changed, 85 insertions(+), 63 deletions(-)

diff --git a/src/gaia/ui/_chat_helpers.py b/src/gaia/ui/_chat_helpers.py
index ad476e581..2aeae8d6f 100644
--- a/src/gaia/ui/_chat_helpers.py
+++ b/src/gaia/ui/_chat_helpers.py
@@ -23,7 +23,7 @@
 import time as _time
 from pathlib import Path
 
-from .database import ChatDatabase
+from .database import SESSION_DEFAULT_MODEL, ChatDatabase
 from .models import ChatRequest
 from .sse_handler import (
     _ANSWER_JSON_SUB_RE,
@@ -73,10 +73,8 @@ def get_agent_registry():
 _agent_cache_lock = threading.Lock()
 _MAX_CACHED_AGENTS = 10
 
-# Matches the fallback default in gaia.ui.database.create_session (~line 233).
-# Kept local to avoid widening _chat_helpers.py's coupling to database.py for
-# a cosmetic rename. If that value changes, update here too.
-_DB_DEFAULT_MODEL = "Qwen3.5-35B-A3B-GGUF"
+# Alias so call-sites read naturally; the canonical value lives in database.py.
+_DB_DEFAULT_MODEL = SESSION_DEFAULT_MODEL
 
 # Last known MCP runtime status — updated after each agent setup so
 # GET /api/mcp/status can return it without needing a running chat.
@@ -89,6 +87,56 @@ def get_agent_registry():
 model_load_lock = threading.Lock()
 
 
+def _build_create_kwargs(
+    *,
+    custom_model: str | None,
+    model_id: str | None,
+    streaming: bool = False,
+) -> dict:
+    """Return the kwargs dict for registry.create_agent().
+
+    Precedence (high → low):
+      1. custom_model setting (explicit user override from db)
+      2. session-explicit model (differs from SESSION_DEFAULT_MODEL)
+      3. omit model_id — lets the agent's kwargs.setdefault govern (fix #841)
+
+    Note: if registry.resolve_model() already promoted model_id before this
+    call, it is forwarded as-is via branch 2 (resolve_model result ≠ default).
+    """
+    suffix = " (streaming)" if streaming else ""
+    kwargs: dict = {"silent_mode": not streaming, "debug": False}
+    if streaming:
+        kwargs["streaming"] = True
+
+    if custom_model:
+        kwargs["model_id"] = custom_model
+        logger.info("create_agent: custom_model override -> %s%s", custom_model, suffix)
+    elif model_id and model_id != _DB_DEFAULT_MODEL:
+        kwargs["model_id"] = model_id
+        logger.info("create_agent: session-explicit model -> %s%s", model_id, suffix)
+    else:
+        # Omit model_id so kwargs.setdefault in the agent's __init__ fires.
+        # setdefault only works when the key is ABSENT. Passing the DB default
+        # (or None / empty) explicitly defeats it — this is the fix for #841.
+        logger.info(
+            "create_agent: omitting model_id kwarg (session at DB default %s); "
+            "agent's kwargs.setdefault or AgentConfig fallback will govern%s",
+            _DB_DEFAULT_MODEL,
+            suffix,
+        )
+    return kwargs
+
+
+def _effective_model(agent, fallback: str | None) -> str | None:
+    """Return agent.model_id if set, else fallback.
+
+    Uses explicit None check (not `or`) to avoid treating empty-string
+    model_id as missing — which would silently load the wrong model.
+    """
+    effective = getattr(agent, "model_id", None)
+    return effective if effective is not None else fallback
+
+
 def get_cached_mcp_status() -> list[dict]:
     """Return the last known MCP server connection status from any cached agent."""
     with _mcp_status_lock:
@@ -559,34 +607,21 @@ def _do_chat():
                     agent_type,
                     session_id[:8],
                 )
-                create_kwargs: dict = {"silent_mode": True, "debug": False}
-                if custom_model:
-                    create_kwargs["model_id"] = custom_model
-                    logger.info(
-                        "create_agent: custom_model override -> %r", custom_model
-                    )
-                elif model_id and model_id != _DB_DEFAULT_MODEL:
-                    create_kwargs["model_id"] = model_id
-                    logger.info("create_agent: session-explicit model -> %r", model_id)
-                else:
-                    # Omit model_id so kwargs.setdefault in the agent's __init__ fires.
-                    # setdefault only works when the key is ABSENT — passing None or the
-                    # DB default explicitly defeats it. This is the fix for issue #841.
-                    logger.info(
-                        "create_agent: omitting model_id kwarg (session at DB default %r); "
-                        "agent's kwargs.setdefault or AgentConfig fallback will govern",
-                        _DB_DEFAULT_MODEL,
-                    )
-                agent = registry.create_agent(agent_type, **create_kwargs)
+                agent = registry.create_agent(
+                    agent_type,
+                    **_build_create_kwargs(
+                        custom_model=custom_model, model_id=model_id
+                    ),
+                )
                 logger.info(
                     "chat: Invoking agent %s for session %s, model=%s",
                     agent_type,
                     session_id[:8],
-                    getattr(agent, "model_id", model_id),
+                    _effective_model(agent, model_id),
                 )
                 _store_agent(
                     session_id,
-                    getattr(agent, "model_id", None) or model_id,
+                    _effective_model(agent, model_id),
                     document_ids,
                     agent,
                     agent_type,
@@ -610,10 +645,10 @@ def _do_chat():
             agent.conversation_history.append({"role": "assistant", "content": a})
 
         # Pre-flight on agent's ACTUAL effective model. When model_id kwarg was
-        # omitted above, the agent's __init__ set model_id via kwargs.setdefault —
-        # a value invisible to us pre-construction. Using agent.model_id preserves
+        # omitted, the agent's __init__ set model_id via kwargs.setdefault —
+        # a value invisible pre-construction. Using _effective_model preserves
         # the existing 100-900s silent-hang protection for all code paths.
-        _maybe_load_expected_model(getattr(agent, "model_id", None) or model_id)
+        _maybe_load_expected_model(_effective_model(agent, model_id))
 
         result = agent.process_query(request.message)
         if isinstance(result, dict):
@@ -940,36 +975,20 @@ def _run_agent():
                             session_id[:8],
                         )
                         t_construct = _time.monotonic()
-                        create_kwargs = {
-                            "streaming": True,
-                            "silent_mode": False,
-                            "debug": False,
-                        }
-                        if custom_model:
-                            create_kwargs["model_id"] = custom_model
-                            logger.info(
-                                "create_agent: custom_model override -> %r (streaming)",
-                                custom_model,
-                            )
-                        elif model_id and model_id != _DB_DEFAULT_MODEL:
-                            create_kwargs["model_id"] = model_id
-                            logger.info(
-                                "create_agent: session-explicit model -> %r (streaming)",
-                                model_id,
-                            )
-                        else:
-                            logger.info(
-                                "create_agent: omitting model_id kwarg (session at DB default %r); "
-                                "agent's kwargs.setdefault or AgentConfig fallback will govern (streaming)",
-                                _DB_DEFAULT_MODEL,
-                            )
-                        agent = registry.create_agent(agent_type, **create_kwargs)
+                        agent = registry.create_agent(
+                            agent_type,
+                            **_build_create_kwargs(
+                                custom_model=custom_model,
+                                model_id=model_id,
+                                streaming=True,
+                            ),
+                        )
                         agent.console = sse_handler
                         logger.info(
                             "chat: Invoking agent %s for session %s, model=%s took=%.3fs",
                             agent_type,
                             session_id[:8],
-                            getattr(agent, "model_id", model_id),
+                            _effective_model(agent, model_id),
                             _time.monotonic() - t_construct,
                         )
 
@@ -982,7 +1001,7 @@ def _run_agent():
 
                         _store_agent(
                             session_id,
-                            getattr(agent, "model_id", None) or model_id,
+                            _effective_model(agent, model_id),
                             document_ids,
                             agent,
                             agent_type,
@@ -1040,7 +1059,7 @@ def _run_agent():
                 # invisible pre-construction. Using agent.model_id preserves the existing
                 # 100-900s silent-hang protection for all code paths including setdefault.
                 _maybe_load_expected_model(
-                    getattr(agent, "model_id", None) or model_id, sse_handler
+                    _effective_model(agent, model_id), sse_handler
                 )
 
                 # -- Phase 5: Query processing --
diff --git a/src/gaia/ui/database.py b/src/gaia/ui/database.py
index a0d037e92..7305f6e8f 100644
--- a/src/gaia/ui/database.py
+++ b/src/gaia/ui/database.py
@@ -20,6 +20,10 @@
 
 DEFAULT_DB_PATH = Path.home() / ".gaia" / "chat" / "gaia_chat.db"
 
+# Default model for new sessions — kept in sync with the SQL schema DEFAULT and
+# any code that reads session["model"] and falls back when the field is NULL.
+SESSION_DEFAULT_MODEL = "Qwen3.5-35B-A3B-GGUF"
+
 SCHEMA_SQL = """
 -- Global document library
 CREATE TABLE IF NOT EXISTS documents (
@@ -230,7 +234,7 @@ def create_session(
         """Create a new chat session."""
         session_id = str(uuid.uuid4())
         now = self._now()
-        model = model or "Qwen3.5-35B-A3B-GGUF"
+        model = model or SESSION_DEFAULT_MODEL
         title = title or "New Chat"
         agent_type = agent_type or "chat"
 
diff --git a/tests/unit/chat/ui/test_chat_helpers_model_resolution.py b/tests/unit/chat/ui/test_chat_helpers_model_resolution.py
index fb402e409..19da2467a 100644
--- a/tests/unit/chat/ui/test_chat_helpers_model_resolution.py
+++ b/tests/unit/chat/ui/test_chat_helpers_model_resolution.py
@@ -18,9 +18,7 @@
 from pathlib import Path
 from unittest.mock import MagicMock, patch
 
-# DB default must match the value used by gaia.ui.database.create_session
-_DB_DEFAULT = "Qwen3.5-35B-A3B-GGUF"
-
+from gaia.ui.database import SESSION_DEFAULT_MODEL as _DB_DEFAULT
 
 # ── Helpers ──────────────────────────────────────────────────────────────────
 
@@ -211,9 +209,10 @@ def test_no_direct_model_id_kwarg_in_create_agent_calls(self):
         import re
 
         src = (Path(__file__).parents[4] / "src/gaia/ui/_chat_helpers.py").read_text()
-        # Matches the old antipattern: create_agent(... model_id=model_id ...)
-        # Uses DOTALL so it catches multiline calls.
-        match = re.search(r"create_agent\([^)]*model_id=model_id", src, re.DOTALL)
+        # Matches the old antipattern: create_agent(... model_id=model_id ...) as a
+        # DIRECT kwarg (not inside a nested call like _build_create_kwargs).
+        # [^()]* stops at any parenthesis so nested helper calls aren't matched.
+        match = re.search(r"create_agent\([^()]*model_id=model_id", src, re.DOTALL)
         assert not match, (
             "Issue #841 regression: registry.create_agent must not receive "
             "model_id=model_id as a direct kwarg. Build create_kwargs conditionally "

From 8f5c7621ca3a1aeb79e046fafd7754ca4929e56d Mon Sep 17 00:00:00 2001
From: Tomasz Iniewicz <tomasz.iniewicz@amd.com>
Date: Wed, 22 Apr 2026 05:14:20 -0400
Subject: [PATCH 3/7] fix(ui): restore intent-key for agent cache store to fix
 miss regression (#842)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

_store_agent was changed by the #842 fix to use _effective_model(agent,
model_id) as the cache key — the post-construction value set by kwargs.setdefault.
_get_cached_agent still looks up using the pre-construction model_id variable.
For custom agents whose setdefault model differs from the session model, the
keys never match and the agent is rebuilt on every turn.

Revert the two _store_agent call sites to use model_id (the pre-construction
intent key), matching what the lookup uses. _effective_model stays at the two
_maybe_load_expected_model sites (Lemonade pre-flight needs the actual model)
and in log statements (observability).

Add two regression guards:
- test_cache_hit_on_second_turn_for_setdefault_agent: two-turn cache-hit test
  with four assertions (call count, object identity, stored-key equality,
  agent.model_id). Covers the builder/template.py setdefault pattern.
- test_no_effective_model_in_store_agent_calls: static grep guard that asserts
  _store_agent never receives _effective_model(...) as a positional arg,
  preventing this pattern from silently returning in a future cleanup pass.
---
 src/gaia/ui/_chat_helpers.py                  |  4 +-
 .../ui/test_chat_helpers_model_resolution.py  | 80 +++++++++++++++++++
 2 files changed, 82 insertions(+), 2 deletions(-)

diff --git a/src/gaia/ui/_chat_helpers.py b/src/gaia/ui/_chat_helpers.py
index 2aeae8d6f..912a139e8 100644
--- a/src/gaia/ui/_chat_helpers.py
+++ b/src/gaia/ui/_chat_helpers.py
@@ -621,7 +621,7 @@ def _do_chat():
                 )
                 _store_agent(
                     session_id,
-                    _effective_model(agent, model_id),
+                    model_id,
                     document_ids,
                     agent,
                     agent_type,
@@ -1001,7 +1001,7 @@ def _run_agent():
 
                         _store_agent(
                             session_id,
-                            _effective_model(agent, model_id),
+                            model_id,
                             document_ids,
                             agent,
                             agent_type,
diff --git a/tests/unit/chat/ui/test_chat_helpers_model_resolution.py b/tests/unit/chat/ui/test_chat_helpers_model_resolution.py
index 19da2467a..775af47ef 100644
--- a/tests/unit/chat/ui/test_chat_helpers_model_resolution.py
+++ b/tests/unit/chat/ui/test_chat_helpers_model_resolution.py
@@ -190,6 +190,69 @@ def test_non_streaming_path_silent_mode_true_preserved(self):
             "kwargs", {}
         ), "Non-streaming path must not pass streaming=True to create_agent"
 
+    def test_cache_hit_on_second_turn_for_setdefault_agent(self):
+        """Cache regression guard for #842 fix: custom agents must hit the cache
+        on turn 2 even when their setdefault model differs from the session model.
+
+        Pre-fix _store_agent used _effective_model(agent, model_id) (the
+        post-construction value, e.g. "SetdefaultChose-GGUF") as the cache key,
+        while _get_cached_agent looked up using the pre-construction model_id
+        (the DB default). The keys never matched → cache miss every turn.
+
+        After the fix, _store_agent uses model_id (pre-construction intent)
+        and the keys agree regardless of what setdefault chose.
+        """
+        import gaia.ui._chat_helpers as _helpers
+        from gaia.ui._chat_helpers import _get_chat_response
+        from gaia.ui.models import ChatRequest
+
+        sid = "cache-test-session"
+        registry, _ = _make_registry(setdefault_model="SetdefaultChose-GGUF")
+        db = _make_db(custom_model=None)
+        session = dict(_make_session(model=_DB_DEFAULT))
+        session["session_id"] = sid
+
+        # Clear cache once; do NOT clear between turns (that's the whole point).
+        with _helpers._agent_cache_lock:
+            _helpers._agent_cache.clear()
+
+        request = ChatRequest(session_id=sid, message="hi", stream=False)
+
+        with (
+            patch("gaia.ui._chat_helpers._agent_registry", registry),
+            patch("gaia.ui._chat_helpers._maybe_load_expected_model"),
+        ):
+            # Turn 1 — agent constructed, stored in cache.
+            _run_sync(_get_chat_response(db, session, request))
+            first_agent = _helpers._agent_cache.get(sid, {}).get("agent")
+
+            # Turn 2 — must hit the cache; no second create_agent call.
+            _run_sync(_get_chat_response(db, session, request))
+            second_agent = _helpers._agent_cache.get(sid, {}).get("agent")
+
+        # 1. Only one construction (cache hit on turn 2).
+        assert registry.create_agent.call_count == 1, (
+            f"Cache regression: create_agent called {registry.create_agent.call_count} "
+            "times; expected 1 (turn 2 must be a cache hit, not a rebuild)"
+        )
+        # 2. Object identity proves the cache returned the same agent.
+        assert second_agent is first_agent, (
+            "Cache regression: turn 2 returned a different agent object — "
+            "cache hit must return the SAME instance, not a reconstructed one"
+        )
+        # 3. Stored key is the pre-construction intent (the actual regression pin).
+        stored_model = _helpers._agent_cache.get(sid, {}).get("model_id")
+        assert stored_model == _DB_DEFAULT, (
+            f"Cache regression: stored model_id={stored_model!r} must equal the "
+            f"pre-construction session model {_DB_DEFAULT!r}, not the agent's "
+            "post-setdefault value — otherwise lookup/store keys diverge"
+        )
+        # 4. Agent's own model_id reflects what setdefault chose.
+        assert first_agent.model_id == "SetdefaultChose-GGUF", (
+            f"Agent model_id={first_agent.model_id!r} must reflect kwargs.setdefault "
+            "value 'SetdefaultChose-GGUF'"
+        )
+
 
 class TestStaticRegressionGuard:
     """Source-level pin against reintroduction of the antipattern."""
@@ -220,6 +283,23 @@ def test_no_direct_model_id_kwarg_in_create_agent_calls(self):
             f"Match found at: {match.group()[:80]!r}"
         )
 
+    def test_no_effective_model_in_store_agent_calls(self):
+        """Cache-key divergence guard for the #842 fix. _store_agent is the cache
+        STORE; its 2nd arg must be the pre-construction model_id so it matches
+        the lookup key used by _get_cached_agent. Passing _effective_model(...)
+        (post-construction) causes the store/lookup keys to diverge whenever the
+        agent's setdefault differs from the session model — agents rebuild every turn.
+        """
+        import re
+
+        src = (Path(__file__).parents[4] / "src/gaia/ui/_chat_helpers.py").read_text()
+        match = re.search(r"_store_agent\([^()]*_effective_model", src, re.DOTALL)
+        assert not match, (
+            "Cache regression (#842): _store_agent must not receive _effective_model(...) "
+            "as a positional arg — store/lookup keys would diverge for setdefault agents. "
+            f"Match: {match.group()[:80]!r}"
+        )
+
 
 class TestPostConstructionPreflight:
     """Verify pre-flight uses agent.model_id (not pre-call model_id variable)."""

From a0fdb10943c1d179708846ad58704a95195793c3 Mon Sep 17 00:00:00 2001
From: Kalin Ovtcharov <kalin@extropolis.ai>
Date: Mon, 20 Apr 2026 14:42:27 -0700
Subject: [PATCH 4/7] docs(plans): fix broken CMU link to EMNLP 2004 Email
 Speech Acts paper (#817)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## Summary

One-line fix: swap the failing `www.cs.cmu.edu/~tom/EMNLP2004_final.pdf`
URL in `docs/plans/email-triage-agent.mdx:2601` for the canonical ACL
Anthology record at [W04-3240](https://aclanthology.org/W04-3240/). The
CMU URL fails DNS resolution in CI (see [recent
run](https://github.com/amd/gaia/actions/runs/24595902571/job/72072156929)),
breaking the ``Verify external URLs`` check for every open PR that
touches docs. ACL Anthology is the permanent archive for ACL/EMNLP
papers — stable URL, no more link rot.

Also restored the paper's actual full title ("Learning to Classify Email
into 'Speech Acts'") for consistency with the other full-title citations
in the same references list.

## Test plan

- [x] `curl -sI https://aclanthology.org/W04-3240/` returns 200
- [ ] After merge, `Verify external URLs` check should go green on
downstream PRs
---
 docs/plans/email-triage-agent.mdx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/plans/email-triage-agent.mdx b/docs/plans/email-triage-agent.mdx
index f515efd0e..95d71cd6d 100644
--- a/docs/plans/email-triage-agent.mdx
+++ b/docs/plans/email-triage-agent.mdx
@@ -2598,7 +2598,7 @@ Choices the spec implies but does not resolve:
 - [Whittaker & Sidner, "Email Overload" (CHI 1996)](https://dl.acm.org/doi/10.1145/238386.238530)
 - [Bellotti et al., "Taking Email to Task" / Taskmaster (CHI 2003)](https://www.semanticscholar.org/paper/Taking-email-to-task/8a28a1ee766d87ca9acbd741a7c1972d69217359)
 - [Aberdeen, Pacovsky & Slater, "Gmail Priority Inbox" (NIPS 2010)](https://research.google/pubs/pub36955/)
-- [Cohen, Carvalho & Mitchell, "Email Speech Acts" (EMNLP 2004)](https://www.cs.cmu.edu/~tom/EMNLP2004_final.pdf)
+- [Cohen, Carvalho & Mitchell, "Learning to Classify Email into 'Speech Acts'" (EMNLP 2004)](https://aclanthology.org/W04-3240/)
 - [Vellum, "Levels of Agentic Behavior"](https://www.vellum.ai/blog/levels-of-agentic-behavior)
 - [Knight Institute, "Levels of Autonomy for AI Agents"](https://knightcolumbia.org/content/levels-of-autonomy-for-ai-agents-1)
 

From 3b51ca92c5ff18a962e629e5fb9c94afd258e9de Mon Sep 17 00:00:00 2001
From: Tomasz Iniewicz <itomek@users.noreply.github.com>
Date: Wed, 22 Apr 2026 18:21:52 -0400
Subject: [PATCH 5/7] style(mcp): apply Black formatting to mcp_bridge.py (CI
 lint fix)

---
 src/gaia/mcp/mcp_bridge.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/gaia/mcp/mcp_bridge.py b/src/gaia/mcp/mcp_bridge.py
index 91333f83d..9ee1468f9 100644
--- a/src/gaia/mcp/mcp_bridge.py
+++ b/src/gaia/mcp/mcp_bridge.py
@@ -628,7 +628,10 @@ def handle_jsonrpc(self, data):
                 400,
                 {
                     "jsonrpc": "2.0",
-                    "error": {"code": -32600, "message": "Invalid Request: expected JSON object"},
+                    "error": {
+                        "code": -32600,
+                        "message": "Invalid Request: expected JSON object",
+                    },
                     "id": None,
                 },
             )

From 28bf5f7906fff76da6c2274527342156570902f1 Mon Sep 17 00:00:00 2001
From: Tomasz Iniewicz <itomek@users.noreply.github.com>
Date: Thu, 23 Apr 2026 19:07:58 -0400
Subject: [PATCH 6/7] Release v0.17.4

Patch release: custom agents now honor their declared model, and the C++
library no longer crashes on null JSON fields from smaller LLMs.

- Custom Agent UI agents honor kwargs.setdefault("model_id", ...) when the
  session is at the DB default (#841, follow-up #842 restores cache hits).
- C++ library adds null-safety guards in parseLlmResponse() to tolerate
  smaller LLMs that return null for "tool" or "content" (#780).
- Docs: swap broken CMU link for canonical ACL Anthology URL (#817).
---
 docs/docs.json                   |  3 +-
 docs/releases/v0.17.4.mdx        | 61 ++++++++++++++++++++++++++++++++
 src/gaia/apps/webui/package.json |  2 +-
 3 files changed, 64 insertions(+), 2 deletions(-)
 create mode 100644 docs/releases/v0.17.4.mdx

diff --git a/docs/docs.json b/docs/docs.json
index 582f5b27c..f00207b3b 100644
--- a/docs/docs.json
+++ b/docs/docs.json
@@ -403,6 +403,7 @@
             "group": "Release Notes",
             "pages": [
               "releases/index",
+              "releases/v0.17.4",
               "releases/v0.17.3",
               "releases/v0.17.2",
               "releases/v0.17.1",
@@ -450,7 +451,7 @@
   "navbar": {
     "links": [
       {
-        "label": "v0.17.3 \u00b7 Lemonade 10.0.0",
+        "label": "v0.17.4 \u00b7 Lemonade 10.0.0",
         "href": "https://github.com/amd/gaia/releases"
       },
       {
diff --git a/docs/releases/v0.17.4.mdx b/docs/releases/v0.17.4.mdx
new file mode 100644
index 000000000..b63972ca9
--- /dev/null
+++ b/docs/releases/v0.17.4.mdx
@@ -0,0 +1,61 @@
+---
+title: "v0.17.4"
+description: "Patch release: custom agents now honor their declared model, and the C++ library no longer crashes on null JSON fields from smaller LLMs."
+---
+
+# GAIA v0.17.4 Release Notes
+
+GAIA v0.17.4 is a focused patch release that fixes two correctness bugs users hit in practice: custom Agent UI agents silently ignoring their declared model, and the C++ library crashing on tool-call responses from smaller LLMs. Upgrade if you build custom agents or run GAIA against quantized/smaller models.
+
+```bash
+pip install --upgrade amd-gaia
+```
+
+**Why upgrade:**
+- **Custom agents finally use the model they declare** — If your agent sets a model via `kwargs.setdefault("model_id", ...)`, the Agent UI now actually runs against that model when the user's session is at the DB default, instead of silently falling back to the session model. A follow-up fix also restores agent-cache hits on the second turn so custom-model agents aren't rebuilt every request.
+- **No more crashes on smaller LLMs** — The C++ library previously crashed with `json.exception.type_error.302` when models like `qwen3.5:9b` returned `null` for `"tool"` or `"content"` fields. Those fields are now parsed safely.
+
+---
+
+## What's New
+
+### Custom Agent `model_id` Respected in the Agent UI
+
+Previously, `_chat_helpers.py` always passed `model_id=<session model>` explicitly to `registry.create_agent()`, which defeated `kwargs.setdefault("model_id", ...)` in custom agents — `setdefault` only fires when the key is absent (PR [#841](https://github.com/amd/gaia/pull/841)).
+
+**What you can do:**
+- Ship a custom agent whose template sets `kwargs.setdefault("model_id", "my-model")` and have the Agent UI actually use `my-model` when the session is at the DB default.
+- Three-branch precedence for model selection is now explicit: `custom_model` setting > session-explicit model > agent's own `setdefault`.
+
+**Under the hood:**
+- Extracted `_build_create_kwargs()` and `_effective_model()` helpers in `src/gaia/ui/_chat_helpers.py` to eliminate duplicate three-branch logic across streaming and non-streaming paths.
+- `SESSION_DEFAULT_MODEL` now exported from `database.py` as the single source of truth.
+- `_effective_model()` uses an explicit `None` check rather than `or`, so an empty string is not treated as a missing model.
+- Follow-up PR [#842](https://github.com/amd/gaia/pull/842) restored the pre-construction `model_id` as the agent-cache key so lookups and stores match — custom-model agents now hit the cache on turn two instead of being rebuilt every request. Added a two-turn cache-hit regression test plus a static guard that asserts `_store_agent` never receives `_effective_model(...)` as a positional arg.
+
+---
+
+### C++ Library: Null-Safety in LLM Response Parsing
+
+Added `.is_string()` / `.is_null()` guards before all `.get<std::string>()` calls on LLM response JSON fields in `cpp/src/json_utils.cpp::parseLlmResponse()` (PR [#780](https://github.com/amd/gaia/pull/780)). Fixes a crash (`json.exception.type_error.302: type must be string, but is null`) when smaller models (e.g. `qwen3.5:9b`) return `null` for `"tool"` or `"content"` instead of omitting them. `json.contains()` returns `true` for `null` values, so the existing checks were insufficient.
+
+---
+
+## Full Changelog
+
+- `8fc43f3f` — fix(cpp): add null-safety checks for JSON string fields in LLM response parsing (#780)
+- `62722de2` — fix(ui): honor custom agent model_id when session is at DB default (#841)
+- `4acfd400` — fix(ui): extract _build_create_kwargs/_effective_model, import SESSION_DEFAULT_MODEL
+- `8f5c7621` — fix(ui): restore intent-key for agent cache store to fix miss regression (#842)
+- `a0fdb109` — docs(plans): fix broken CMU link to EMNLP 2004 Email Speech Acts paper (#817)
+
+Full Changelog: [v0.17.3...v0.17.4](https://github.com/amd/gaia/compare/v0.17.3...v0.17.4)
+
+---
+
+## Release checklist
+- [x] `util/validate_release_notes.py docs/releases/v0.17.4.mdx --tag v0.17.4` passes
+- [x] `src/gaia/version.py` → `0.17.4`
+- [x] `src/gaia/apps/webui/package.json` → `0.17.4`
+- [x] Navbar label in `docs/docs.json` → `v0.17.4 · Lemonade 10.0.0`
+- [ ] Review from @kovtcharov-amd addressed
diff --git a/src/gaia/apps/webui/package.json b/src/gaia/apps/webui/package.json
index 14a942e9b..9c4e827d0 100644
--- a/src/gaia/apps/webui/package.json
+++ b/src/gaia/apps/webui/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@amd-gaia/agent-ui",
-  "version": "0.17.3",
+  "version": "0.17.4",
   "type": "module",
   "productName": "GAIA Agent UI",
   "description": "Privacy-first agentic AI interface with document Q&A - runs 100% locally on AMD Ryzen AI",

From 1e19ce16344813cb587dc16c5fc1ad1ee9ea9f99 Mon Sep 17 00:00:00 2001
From: Tomasz Iniewicz <itomek@users.noreply.github.com>
Date: Thu, 23 Apr 2026 19:10:47 -0400
Subject: [PATCH 7/7] docs(release): tone down v0.17.4 notes, drop install
 block, match v0.17.1 style

---
 docs/releases/v0.17.4.mdx | 43 +++++++++++++++------------------------
 1 file changed, 16 insertions(+), 27 deletions(-)

diff --git a/docs/releases/v0.17.4.mdx b/docs/releases/v0.17.4.mdx
index b63972ca9..770a16c9b 100644
--- a/docs/releases/v0.17.4.mdx
+++ b/docs/releases/v0.17.4.mdx
@@ -1,19 +1,15 @@
 ---
 title: "v0.17.4"
-description: "Patch release: custom agents now honor their declared model, and the C++ library no longer crashes on null JSON fields from smaller LLMs."
+description: "Custom-agent model selection, C++ null-safety, and docs link fix"
 ---
 
 # GAIA v0.17.4 Release Notes
 
-GAIA v0.17.4 is a focused patch release that fixes two correctness bugs users hit in practice: custom Agent UI agents silently ignoring their declared model, and the C++ library crashing on tool-call responses from smaller LLMs. Upgrade if you build custom agents or run GAIA against quantized/smaller models.
-
-```bash
-pip install --upgrade amd-gaia
-```
+GAIA v0.17.4 is a patch release covering two correctness fixes in the Agent UI custom-agent path, a null-safety fix in the C++ library for smaller LLMs, and a broken docs citation.
 
 **Why upgrade:**
-- **Custom agents finally use the model they declare** — If your agent sets a model via `kwargs.setdefault("model_id", ...)`, the Agent UI now actually runs against that model when the user's session is at the DB default, instead of silently falling back to the session model. A follow-up fix also restores agent-cache hits on the second turn so custom-model agents aren't rebuilt every request.
-- **No more crashes on smaller LLMs** — The C++ library previously crashed with `json.exception.type_error.302` when models like `qwen3.5:9b` returned `null` for `"tool"` or `"content"` fields. Those fields are now parsed safely.
+- **Custom agents use their declared model** — If a custom agent sets a model via `kwargs.setdefault("model_id", ...)`, the Agent UI now respects that setting when the session is at the DB default, instead of falling back to the session model.
+- **Compatibility with smaller LLMs in the C++ library** — The C++ JSON parser now tolerates `null` values in `"tool"` and `"content"` fields, which some smaller models emit in place of omitting the field.
 
 ---
 
@@ -21,28 +17,30 @@ pip install --upgrade amd-gaia
 
 ### Custom Agent `model_id` Respected in the Agent UI
 
-Previously, `_chat_helpers.py` always passed `model_id=<session model>` explicitly to `registry.create_agent()`, which defeated `kwargs.setdefault("model_id", ...)` in custom agents — `setdefault` only fires when the key is absent (PR [#841](https://github.com/amd/gaia/pull/841)).
+`_chat_helpers.py` previously passed `model_id=<session model>` explicitly to `registry.create_agent()`, which defeated `kwargs.setdefault("model_id", ...)` in custom agents — `setdefault` only fires when the key is absent (PR [#841](https://github.com/amd/gaia/pull/841)). The Agent UI now builds `create_kwargs` conditionally, omitting `model_id` when the session is at the DB default so the agent's `__init__` setdefault governs. Three-branch precedence is now explicit: `custom_model` setting > session-explicit model > agent's own `setdefault`.
 
-**What you can do:**
-- Ship a custom agent whose template sets `kwargs.setdefault("model_id", "my-model")` and have the Agent UI actually use `my-model` when the session is at the DB default.
-- Three-branch precedence for model selection is now explicit: `custom_model` setting > session-explicit model > agent's own `setdefault`.
+A follow-up fix (PR [#842](https://github.com/amd/gaia/pull/842)) restored the pre-construction `model_id` as the agent-cache key. The initial PR #841 landing had switched `_store_agent` to use the post-construction `_effective_model(agent, model_id)` while `_get_cached_agent` still looked up with `model_id`, so keys never matched for custom-model agents and the agent was rebuilt on every turn. A two-turn cache-hit regression test and a static guard on `_store_agent` call sites were added alongside the fix.
 
-**Under the hood:**
-- Extracted `_build_create_kwargs()` and `_effective_model()` helpers in `src/gaia/ui/_chat_helpers.py` to eliminate duplicate three-branch logic across streaming and non-streaming paths.
-- `SESSION_DEFAULT_MODEL` now exported from `database.py` as the single source of truth.
-- `_effective_model()` uses an explicit `None` check rather than `or`, so an empty string is not treated as a missing model.
-- Follow-up PR [#842](https://github.com/amd/gaia/pull/842) restored the pre-construction `model_id` as the agent-cache key so lookups and stores match — custom-model agents now hit the cache on turn two instead of being rebuilt every request. Added a two-turn cache-hit regression test plus a static guard that asserts `_store_agent` never receives `_effective_model(...)` as a positional arg.
+Supporting refactor: extracted `_build_create_kwargs()` and `_effective_model()` helpers in `src/gaia/ui/_chat_helpers.py` to deduplicate the three-branch logic across streaming and non-streaming paths, and exported `SESSION_DEFAULT_MODEL` from `database.py` as the single source of truth.
 
 ---
 
 ### C++ Library: Null-Safety in LLM Response Parsing
 
-Added `.is_string()` / `.is_null()` guards before all `.get<std::string>()` calls on LLM response JSON fields in `cpp/src/json_utils.cpp::parseLlmResponse()` (PR [#780](https://github.com/amd/gaia/pull/780)). Fixes a crash (`json.exception.type_error.302: type must be string, but is null`) when smaller models (e.g. `qwen3.5:9b`) return `null` for `"tool"` or `"content"` instead of omitting them. `json.contains()` returns `true` for `null` values, so the existing checks were insufficient.
+`parseLlmResponse()` in `cpp/src/json_utils.cpp` now guards `.get<std::string>()` calls on the `"tool"` and `"answer"` JSON fields with `.is_string()` / `.is_null()` checks (PR [#780](https://github.com/amd/gaia/pull/780)). This fixes a crash (`json.exception.type_error.302: type must be string, but is null`) when smaller LLMs (for example `qwen3.5:9b`) return `null` for those fields instead of omitting them. `json.contains()` returns `true` for `null` values, so the existing presence checks were insufficient.
+
+---
+
+## Bug Fixes
+
+- **Email-triage agent plan: broken CMU citation link** (PR [#817](https://github.com/amd/gaia/pull/817)) — Swapped the failing `www.cs.cmu.edu/~tom/EMNLP2004_final.pdf` URL in `docs/plans/email-triage-agent.mdx` for the canonical ACL Anthology record at [W04-3240](https://aclanthology.org/W04-3240/). The CMU URL was failing DNS resolution in CI, breaking the `Verify external URLs` check on every open docs PR. Restored the paper's full title ("Learning to Classify Email into 'Speech Acts'") for consistency with other citations in the same references list.
 
 ---
 
 ## Full Changelog
 
+**5 commits** since v0.17.3:
+
 - `8fc43f3f` — fix(cpp): add null-safety checks for JSON string fields in LLM response parsing (#780)
 - `62722de2` — fix(ui): honor custom agent model_id when session is at DB default (#841)
 - `4acfd400` — fix(ui): extract _build_create_kwargs/_effective_model, import SESSION_DEFAULT_MODEL
@@ -50,12 +48,3 @@ Added `.is_string()` / `.is_null()` guards before all `.get<std::string>()` call
 - `a0fdb109` — docs(plans): fix broken CMU link to EMNLP 2004 Email Speech Acts paper (#817)
 
 Full Changelog: [v0.17.3...v0.17.4](https://github.com/amd/gaia/compare/v0.17.3...v0.17.4)
-
----
-
-## Release checklist
-- [x] `util/validate_release_notes.py docs/releases/v0.17.4.mdx --tag v0.17.4` passes
-- [x] `src/gaia/version.py` → `0.17.4`
-- [x] `src/gaia/apps/webui/package.json` → `0.17.4`
-- [x] Navbar label in `docs/docs.json` → `v0.17.4 · Lemonade 10.0.0`
-- [ ] Review from @kovtcharov-amd addressed