From 62722de2850443641decadfa333a89dd98c529b9 Mon Sep 17 00:00:00 2001 From: Tomasz Iniewicz Date: Mon, 20 Apr 2026 18:50:45 -0400 Subject: [PATCH 1/7] fix(ui): honor custom agent model_id when session is at DB default (#841) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously, _chat_helpers.py always passed model_id= explicitly to registry.create_agent(), defeating kwargs.setdefault("model_id", ...) in custom agents — which only fires when the key is absent. Fix: build create_kwargs conditionally, omitting model_id when the session is at the DB default so the agent's __init__ setdefault governs. Also use agent.model_id (post-construction) for both _store_agent cache key and the pre-flight _maybe_load_expected_model call. Three-branch precedence: custom_model setting > session-explicit > omit kwarg. Closes #841 --- src/gaia/ui/_chat_helpers.py | 97 ++++-- tests/integration/test_chat_ui_integration.py | 91 ++++++ .../unit/chat/ui/test_agent_model_override.py | 58 ++++ .../ui/test_chat_helpers_model_resolution.py | 288 ++++++++++++++++++ 4 files changed, 511 insertions(+), 23 deletions(-) create mode 100644 tests/unit/chat/ui/test_agent_model_override.py create mode 100644 tests/unit/chat/ui/test_chat_helpers_model_resolution.py diff --git a/src/gaia/ui/_chat_helpers.py b/src/gaia/ui/_chat_helpers.py index cb23682c5..ad476e581 100644 --- a/src/gaia/ui/_chat_helpers.py +++ b/src/gaia/ui/_chat_helpers.py @@ -73,6 +73,11 @@ def get_agent_registry(): _agent_cache_lock = threading.Lock() _MAX_CACHED_AGENTS = 10 +# Matches the fallback default in gaia.ui.database.create_session (~line 233). +# Kept local to avoid widening _chat_helpers.py's coupling to database.py for +# a cosmetic rename. If that value changes, update here too. +_DB_DEFAULT_MODEL = "Qwen3.5-35B-A3B-GGUF" + # Last known MCP runtime status — updated after each agent setup so # GET /api/mcp/status can return it without needing a running chat. _mcp_status_cache: list[dict] = [] @@ -554,19 +559,38 @@ def _do_chat(): agent_type, session_id[:8], ) - agent = registry.create_agent( - agent_type, - model_id=model_id, - silent_mode=True, - debug=False, - ) + create_kwargs: dict = {"silent_mode": True, "debug": False} + if custom_model: + create_kwargs["model_id"] = custom_model + logger.info( + "create_agent: custom_model override -> %r", custom_model + ) + elif model_id and model_id != _DB_DEFAULT_MODEL: + create_kwargs["model_id"] = model_id + logger.info("create_agent: session-explicit model -> %r", model_id) + else: + # Omit model_id so kwargs.setdefault in the agent's __init__ fires. + # setdefault only works when the key is ABSENT — passing None or the + # DB default explicitly defeats it. This is the fix for issue #841. + logger.info( + "create_agent: omitting model_id kwarg (session at DB default %r); " + "agent's kwargs.setdefault or AgentConfig fallback will govern", + _DB_DEFAULT_MODEL, + ) + agent = registry.create_agent(agent_type, **create_kwargs) logger.info( "chat: Invoking agent %s for session %s, model=%s", agent_type, session_id[:8], - model_id, + getattr(agent, "model_id", model_id), + ) + _store_agent( + session_id, + getattr(agent, "model_id", None) or model_id, + document_ids, + agent, + agent_type, ) - _store_agent(session_id, model_id, document_ids, agent, agent_type) # Restore conversation history (limited to prevent context overflow). # Always re-inject from DB so the history is consistent with what was @@ -585,8 +609,11 @@ def _do_chat(): agent.conversation_history.append({"role": "user", "content": u}) agent.conversation_history.append({"role": "assistant", "content": a}) - # Pre-flight: same fix as the streaming path — see _maybe_load_expected_model. - _maybe_load_expected_model(model_id) + # Pre-flight on agent's ACTUAL effective model. When model_id kwarg was + # omitted above, the agent's __init__ set model_id via kwargs.setdefault — + # a value invisible to us pre-construction. Using agent.model_id preserves + # the existing 100-900s silent-hang protection for all code paths. + _maybe_load_expected_model(getattr(agent, "model_id", None) or model_id) result = agent.process_query(request.message) if isinstance(result, dict): @@ -913,19 +940,36 @@ def _run_agent(): session_id[:8], ) t_construct = _time.monotonic() - agent = registry.create_agent( - agent_type, - model_id=model_id, - streaming=True, - silent_mode=False, - debug=False, - ) + create_kwargs = { + "streaming": True, + "silent_mode": False, + "debug": False, + } + if custom_model: + create_kwargs["model_id"] = custom_model + logger.info( + "create_agent: custom_model override -> %r (streaming)", + custom_model, + ) + elif model_id and model_id != _DB_DEFAULT_MODEL: + create_kwargs["model_id"] = model_id + logger.info( + "create_agent: session-explicit model -> %r (streaming)", + model_id, + ) + else: + logger.info( + "create_agent: omitting model_id kwarg (session at DB default %r); " + "agent's kwargs.setdefault or AgentConfig fallback will govern (streaming)", + _DB_DEFAULT_MODEL, + ) + agent = registry.create_agent(agent_type, **create_kwargs) agent.console = sse_handler logger.info( "chat: Invoking agent %s for session %s, model=%s took=%.3fs", agent_type, session_id[:8], - model_id, + getattr(agent, "model_id", model_id), _time.monotonic() - t_construct, ) @@ -937,7 +981,11 @@ def _run_agent(): _index_rag_with_progress(agent, rag_file_paths, sse_handler) _store_agent( - session_id, model_id, document_ids, agent, agent_type + session_id, + getattr(agent, "model_id", None) or model_id, + document_ids, + agent, + agent_type, ) sse_handler._emit( @@ -987,10 +1035,13 @@ def _run_agent(): if sse_handler.cancelled.is_set(): return - # Pre-flight: ensure a chat-capable LLM is active before sending the query. - # Lemonade silently hangs when no model is loaded or the embedding model is - # active — no error is returned, so _execute_with_auto_download never fires. - _maybe_load_expected_model(model_id, sse_handler) + # Pre-flight on agent's ACTUAL effective model. When model_id kwarg was + # omitted, the agent's __init__ set model_id via kwargs.setdefault — a value + # invisible pre-construction. Using agent.model_id preserves the existing + # 100-900s silent-hang protection for all code paths including setdefault. + _maybe_load_expected_model( + getattr(agent, "model_id", None) or model_id, sse_handler + ) # -- Phase 5: Query processing -- t_query = _time.monotonic() diff --git a/tests/integration/test_chat_ui_integration.py b/tests/integration/test_chat_ui_integration.py index ee6db321a..b092a9396 100644 --- a/tests/integration/test_chat_ui_integration.py +++ b/tests/integration/test_chat_ui_integration.py @@ -1591,3 +1591,94 @@ def test_delete_messages_from_session_not_found(self, client): """DELETE .../and-below returns 404 for non-existent session.""" resp = client.delete("/api/sessions/nonexistent/messages/1/and-below") assert resp.status_code == 404 + + +# ── Issue #841 regression: custom agent model_id honored through API ────────── + + +class TestCustomAgentModelChoice: + """Verify that a custom Python agent's kwargs.setdefault model_id reaches the + registry.create_agent call without model_id being passed as an explicit kwarg. + + This is the integration-layer pin for issue #841. It exercises the full + path: HTTP POST → session → _get_chat_response → registry.create_agent. + """ + + def test_custom_agent_model_id_honored_through_api(self, tmp_path): + import textwrap + + agents_dir = tmp_path / ".gaia" / "agents" / "smallbot" + agents_dir.mkdir(parents=True) + (agents_dir / "agent.py").write_text(textwrap.dedent(""" + from gaia.agents.base.agent import Agent + + class SmallBot(Agent): + AGENT_ID = "smallbot" + AGENT_NAME = "SmallBot" + + def __init__(self, **kwargs): + kwargs.setdefault("model_id", "Qwen3.5-4B-GGUF") + super().__init__(skip_lemonade=True, **kwargs) + + def _get_system_prompt(self): + return "x" + + def _register_tools(self): + pass + """)) + + # HOME patch must wrap the full lifespan: discover() fires on __enter__. + with patch("gaia.agents.registry.Path.home", return_value=tmp_path): + app = create_app(db_path=":memory:") + + with TestClient(app) as client: + # Spy on create_agent AFTER lifespan fires (registry exists now). + captured = {} + original_create = app.state.agent_registry.create_agent + + def _spy(agent_id, **kwargs): + if agent_id == "smallbot": + captured["model_id_kwarg"] = kwargs.get("model_id", "") + agent = original_create(agent_id, **kwargs) + if agent_id == "smallbot": + captured["agent_model_id"] = getattr(agent, "model_id", None) + return agent + + app.state.agent_registry.create_agent = _spy + + # Create a session typed to our custom agent. + sess_resp = client.post( + "/api/sessions", + json={"title": "841-test", "agent_type": "smallbot"}, + ) + assert sess_resp.status_code == 200, sess_resp.text + sid = sess_resp.json()["id"] + + # Send a chat message, bypassing Lemonade and LLM. + with ( + patch("gaia.ui._chat_helpers._maybe_load_expected_model"), + patch( + "gaia.ui._chat_helpers._agent_registry", + app.state.agent_registry, + ), + ): + chat_resp = client.post( + "/api/chat/send", + json={ + "session_id": sid, + "message": "hi", + "stream": False, + }, + ) + + assert chat_resp.status_code == 200, chat_resp.text + + assert captured, "create_agent spy was never called for smallbot" + assert captured["model_id_kwarg"] == "", ( + f"Issue #841: model_id kwarg must be omitted when session is at DB default; " + f"got model_id_kwarg={captured['model_id_kwarg']!r}" + ) + assert captured["agent_model_id"] == "Qwen3.5-4B-GGUF", ( + f"Issue #841: agent.model_id must reflect kwargs.setdefault value; " + f"got {captured['agent_model_id']!r}" + ) diff --git a/tests/unit/chat/ui/test_agent_model_override.py b/tests/unit/chat/ui/test_agent_model_override.py new file mode 100644 index 000000000..19d116eb5 --- /dev/null +++ b/tests/unit/chat/ui/test_agent_model_override.py @@ -0,0 +1,58 @@ +# Copyright(C) 2025-2026 Advanced Micro Devices, Inc. All rights reserved. +# SPDX-License-Identifier: MIT + +"""Regression test for issue #841: custom agent's model_id ignored by UI.""" + +import textwrap +from unittest.mock import patch + +from gaia.agents.registry import AgentRegistry + + +def test_issue_841_custom_python_agent_model_id_respected(tmp_path): + """A custom Python agent using kwargs.setdefault in __init__ must be + instantiated with its own model_id when the UI omits the kwarg. + + On pre-fix main, the UI always passes model_id= explicitly + to registry.create_agent — defeating kwargs.setdefault, which only fires + when the key is ABSENT. After T3 lands, the UI omits model_id when no + explicit user choice exists, so setdefault fires as the agent intends. + + This test simulates the fixed UI call pattern: calling create_agent without + model_id, and asserting the agent's declared default is respected. + """ + agents_dir = tmp_path / ".gaia" / "agents" / "foo" + agents_dir.mkdir(parents=True) + (agents_dir / "agent.py").write_text(textwrap.dedent(""" + from gaia.agents.base.agent import Agent + + class FooAgent(Agent): + AGENT_ID = "foo" + AGENT_NAME = "Foo" + + def __init__(self, **kwargs): + kwargs.setdefault("model_id", "Qwen3.5-4B-GGUF") + super().__init__(skip_lemonade=True, **kwargs) + + def _get_system_prompt(self): + return "foo" + + def _register_tools(self): + pass + """)) + + with patch("gaia.agents.registry.Path.home", return_value=tmp_path): + registry = AgentRegistry() + registry.discover() + + reg = registry.get("foo") + assert reg is not None, "custom agent should be discovered under patched HOME" + + # Simulate what the fixed UI does when no explicit user choice exists: + # OMIT the model_id kwarg entirely so setdefault fires. + agent = registry.create_agent("foo", silent_mode=True, debug=False) + + assert agent.model_id == "Qwen3.5-4B-GGUF", ( + f"Issue #841: custom agent's kwargs.setdefault('model_id', ...) must " + f"govern when UI omits the kwarg; got {agent.model_id!r}" + ) diff --git a/tests/unit/chat/ui/test_chat_helpers_model_resolution.py b/tests/unit/chat/ui/test_chat_helpers_model_resolution.py new file mode 100644 index 000000000..fb402e409 --- /dev/null +++ b/tests/unit/chat/ui/test_chat_helpers_model_resolution.py @@ -0,0 +1,288 @@ +# Copyright(C) 2025-2026 Advanced Micro Devices, Inc. All rights reserved. +# SPDX-License-Identifier: MIT + +"""Unit tests for the model_id kwarg selection logic in _chat_helpers.py. + +Covers the three-branch precedence chain introduced by the #841 fix: + 1. custom_model setting wins over everything + 2. Session-explicit model (anything != DB default) is honored + 3. model_id kwarg OMITTED when session is at the DB default, so that the + custom agent's kwargs.setdefault("model_id", ...) fires (the #841 fix) + +Also pins: streaming vs non-streaming silent_mode values, static source-grep +guard against reintroduction of the antipattern, post-construction pre-flight +contract, and built-in ChatAgent (agent_type="chat") behavior unchanged. +""" + +import asyncio +from pathlib import Path +from unittest.mock import MagicMock, patch + +# DB default must match the value used by gaia.ui.database.create_session +_DB_DEFAULT = "Qwen3.5-35B-A3B-GGUF" + + +# ── Helpers ────────────────────────────────────────────────────────────────── + + +def _run_sync(coro): + return asyncio.get_event_loop().run_until_complete(coro) + + +def _make_session(model=_DB_DEFAULT, agent_type="bot"): + return { + "document_ids": [], + "model": model, + "agent_type": agent_type, + "session_id": "sess-1", + } + + +def _make_db(custom_model=None): + db = MagicMock() + db.get_messages.return_value = [] + db.get_setting.return_value = custom_model + db.list_documents.return_value = [] + db.update_session.return_value = None + db.get_session.return_value = {} + return db + + +def _make_registry(resolve_model_return=None, setdefault_model="SetdefaultChose-GGUF"): + """Return (registry_mock, captured_dict). + + captured["kwargs"] holds the kwargs received by create_agent. + The fake agent's model_id mimics kwargs.setdefault: if model_id was NOT + passed, it is set to setdefault_model; otherwise it keeps the passed value. + """ + registry = MagicMock() + registry.get.return_value = True # agent_type is registered + registry.resolve_model.return_value = resolve_model_return + + captured = {} + + def _spy(agent_id, **kwargs): + captured["kwargs"] = dict(kwargs) + fake = MagicMock() + fake.model_id = kwargs.get("model_id", setdefault_model) + fake.process_query.return_value = "ok" + fake.conversation_history = [] + fake.indexed_files = set() + return fake + + registry.create_agent.side_effect = _spy + return registry, captured + + +def _call_non_streaming(session, db, agent_type_override=None, session_id="sess-1"): + import gaia.ui._chat_helpers as _helpers + from gaia.ui._chat_helpers import _get_chat_response + from gaia.ui.models import ChatRequest + + # Clear the agent cache so tests don't interfere with each other. + with _helpers._agent_cache_lock: + _helpers._agent_cache.clear() + + request = ChatRequest( + session_id=session_id, + message="hi", + stream=False, + agent_type=agent_type_override, + ) + session = dict(session) + session.setdefault("session_id", session_id) + return _run_sync(_get_chat_response(db, session, request)) + + +# ── Tests ───────────────────────────────────────────────────────────────────── + + +class TestModelKwargSelection: + """Verify the three-branch model_id selection at both call sites.""" + + def test_custom_model_setting_wins_over_everything(self): + """db.get_setting('custom_model') result always reaches create_agent as model_id.""" + registry, captured = _make_registry(setdefault_model="AgentPref-GGUF") + db = _make_db(custom_model="UserPicked-GGUF") + session = _make_session(model=_DB_DEFAULT) + + with ( + patch("gaia.ui._chat_helpers._agent_registry", registry), + patch("gaia.ui._chat_helpers._maybe_load_expected_model"), + ): + _call_non_streaming(session, db) + + assert captured["kwargs"].get("model_id") == "UserPicked-GGUF" + + def test_session_explicit_model_honored(self): + """A session model that differs from the DB default is forwarded as model_id.""" + registry, captured = _make_registry() + db = _make_db(custom_model=None) + session = _make_session(model="UserChose-GGUF") + + with ( + patch("gaia.ui._chat_helpers._agent_registry", registry), + patch("gaia.ui._chat_helpers._maybe_load_expected_model"), + ): + _call_non_streaming(session, db) + + assert captured["kwargs"].get("model_id") == "UserChose-GGUF" + + def test_model_id_kwarg_omitted_when_session_at_db_default(self): + """Core #841 fix: model_id kwarg must be ABSENT when session == DB default. + + kwargs.setdefault only fires when the key is absent. The pre-fix code + always passes model_id= explicitly, defeating setdefault. + After the fix, model_id is omitted so the agent's __init__ governs. + """ + registry, captured = _make_registry(setdefault_model="SetdefaultChose-GGUF") + db = _make_db(custom_model=None) + session = _make_session(model=_DB_DEFAULT) + + with ( + patch("gaia.ui._chat_helpers._agent_registry", registry), + patch("gaia.ui._chat_helpers._maybe_load_expected_model"), + ): + _call_non_streaming(session, db) + + assert "model_id" not in captured.get("kwargs", {}), ( + "Issue #841: model_id kwarg must be omitted when session is at DB default; " + f"got kwargs={captured.get('kwargs')}" + ) + # The spy's setdefault model should be what the agent ends up with. + assert ( + captured.get("kwargs", {}).get("model_id", "SetdefaultChose-GGUF") + == "SetdefaultChose-GGUF" + ) + + def test_model_id_kwarg_omitted_when_session_model_is_none(self): + """model_id kwarg is omitted when session model is None (unset session).""" + registry, captured = _make_registry(setdefault_model="SetdefaultChose-GGUF") + db = _make_db(custom_model=None) + session = _make_session(model=None) + + with ( + patch("gaia.ui._chat_helpers._agent_registry", registry), + patch("gaia.ui._chat_helpers._maybe_load_expected_model"), + ): + _call_non_streaming(session, db) + + assert "model_id" not in captured.get("kwargs", {}), ( + f"model_id kwarg must be omitted when session model is None; " + f"got kwargs={captured.get('kwargs')}" + ) + + def test_non_streaming_path_silent_mode_true_preserved(self): + """Non-streaming create_agent call must pass silent_mode=True.""" + registry, captured = _make_registry() + db = _make_db(custom_model=None) + session = _make_session(model=_DB_DEFAULT) + + with ( + patch("gaia.ui._chat_helpers._agent_registry", registry), + patch("gaia.ui._chat_helpers._maybe_load_expected_model"), + ): + _call_non_streaming(session, db) + + assert captured.get("kwargs", {}).get("silent_mode") is True, ( + "Non-streaming path must pass silent_mode=True to create_agent; " + f"got kwargs={captured.get('kwargs')}" + ) + assert "streaming" not in captured.get( + "kwargs", {} + ), "Non-streaming path must not pass streaming=True to create_agent" + + +class TestStaticRegressionGuard: + """Source-level pin against reintroduction of the antipattern.""" + + def test_no_direct_model_id_kwarg_in_create_agent_calls(self): + """registry.create_agent must never be called with model_id=model_id directly. + + The pre-fix antipattern was: + registry.create_agent(agent_type, model_id=model_id, ...) + which always passes the kwarg explicitly, defeating kwargs.setdefault. + + ChatAgentConfig(model_id=model_id, ...) is legitimate and intentionally + excluded from this check — only create_agent calls are guarded. + + This test catches future regressions at the source level in <5ms. + """ + import re + + src = (Path(__file__).parents[4] / "src/gaia/ui/_chat_helpers.py").read_text() + # Matches the old antipattern: create_agent(... model_id=model_id ...) + # Uses DOTALL so it catches multiline calls. + match = re.search(r"create_agent\([^)]*model_id=model_id", src, re.DOTALL) + assert not match, ( + "Issue #841 regression: registry.create_agent must not receive " + "model_id=model_id as a direct kwarg. Build create_kwargs conditionally " + "and omit model_id when no explicit user choice exists.\n" + f"Match found at: {match.group()[:80]!r}" + ) + + +class TestPostConstructionPreflight: + """Verify pre-flight uses agent.model_id (not pre-call model_id variable).""" + + def test_preflight_receives_agent_effective_model(self): + """_maybe_load_expected_model must be called with the agent's actual model_id. + + When model_id kwarg is omitted, the agent's __init__ sets model_id via + setdefault AFTER construction. The pre-fix code called + _maybe_load_expected_model(model_id) with the pre-call variable (DB + default), missing the agent's actual effective model. The fix calls it + with agent.model_id so Lemonade pre-flight fires for the right model. + """ + registry, captured = _make_registry(setdefault_model="SetdefaultChose-GGUF") + db = _make_db(custom_model=None) + session = _make_session(model=_DB_DEFAULT) + + preflight_calls = [] + + def _spy_preflight(model_id, *args, **kwargs): + preflight_calls.append(model_id) + + with ( + patch("gaia.ui._chat_helpers._agent_registry", registry), + patch( + "gaia.ui._chat_helpers._maybe_load_expected_model", + side_effect=_spy_preflight, + ), + ): + _call_non_streaming(session, db) + + assert preflight_calls, "_maybe_load_expected_model was never called" + # After the fix, pre-flight must use the agent's actual model_id + # ("SetdefaultChose-GGUF"), not the DB default it was seeded with. + assert preflight_calls[-1] == "SetdefaultChose-GGUF", ( + f"Pre-flight must use agent.model_id after construction; " + f"got {preflight_calls[-1]!r} (expected 'SetdefaultChose-GGUF')" + ) + + +class TestBuiltinChatAgentUnchanged: + """Pin AC4: built-in ChatAgent (agent_type='chat') behavior is unchanged.""" + + def test_chat_agent_type_bypasses_registry(self): + """agent_type='chat' must not go through registry.create_agent.""" + registry, captured = _make_registry() + db = _make_db(custom_model=None) + session = _make_session(model=_DB_DEFAULT, agent_type="chat") + + fake_agent = MagicMock() + fake_agent.process_query.return_value = "ok" + fake_agent.conversation_history = [] + fake_agent.indexed_files = set() + fake_agent.rag = None + + with ( + patch("gaia.ui._chat_helpers._agent_registry", registry), + patch("gaia.ui._chat_helpers._maybe_load_expected_model"), + patch("gaia.agents.chat.agent.ChatAgent", return_value=fake_agent), + patch("gaia.agents.chat.agent.ChatAgentConfig"), + ): + _call_non_streaming(session, db, agent_type_override=None) + + # registry.create_agent must NOT have been called for the chat path + registry.create_agent.assert_not_called() From 4acfd400bba16654eb6f24865a8ade31173e51d2 Mon Sep 17 00:00:00 2001 From: Tomasz Iniewicz Date: Mon, 20 Apr 2026 19:10:16 -0400 Subject: [PATCH 2/7] fix(ui): extract _build_create_kwargs/_effective_model, import SESSION_DEFAULT_MODEL Addresses code review feedback on PR #842: - Export SESSION_DEFAULT_MODEL from database.py (single source of truth) instead of duplicating the string literal in _chat_helpers.py - Extract _build_create_kwargs() helper to eliminate the duplicate three-branch create_kwargs logic across non-streaming and streaming code paths - Extract _effective_model() helper using explicit None check (not `or`) to safely read agent.model_id post-construction without treating empty string as missing - Fix static regression guard regex to use [^()]* so nested helper calls inside create_agent() are not falsely flagged - Update unit test to import SESSION_DEFAULT_MODEL instead of hardcoding --- src/gaia/ui/_chat_helpers.py | 131 ++++++++++-------- src/gaia/ui/database.py | 6 +- .../ui/test_chat_helpers_model_resolution.py | 11 +- 3 files changed, 85 insertions(+), 63 deletions(-) diff --git a/src/gaia/ui/_chat_helpers.py b/src/gaia/ui/_chat_helpers.py index ad476e581..2aeae8d6f 100644 --- a/src/gaia/ui/_chat_helpers.py +++ b/src/gaia/ui/_chat_helpers.py @@ -23,7 +23,7 @@ import time as _time from pathlib import Path -from .database import ChatDatabase +from .database import SESSION_DEFAULT_MODEL, ChatDatabase from .models import ChatRequest from .sse_handler import ( _ANSWER_JSON_SUB_RE, @@ -73,10 +73,8 @@ def get_agent_registry(): _agent_cache_lock = threading.Lock() _MAX_CACHED_AGENTS = 10 -# Matches the fallback default in gaia.ui.database.create_session (~line 233). -# Kept local to avoid widening _chat_helpers.py's coupling to database.py for -# a cosmetic rename. If that value changes, update here too. -_DB_DEFAULT_MODEL = "Qwen3.5-35B-A3B-GGUF" +# Alias so call-sites read naturally; the canonical value lives in database.py. +_DB_DEFAULT_MODEL = SESSION_DEFAULT_MODEL # Last known MCP runtime status — updated after each agent setup so # GET /api/mcp/status can return it without needing a running chat. @@ -89,6 +87,56 @@ def get_agent_registry(): model_load_lock = threading.Lock() +def _build_create_kwargs( + *, + custom_model: str | None, + model_id: str | None, + streaming: bool = False, +) -> dict: + """Return the kwargs dict for registry.create_agent(). + + Precedence (high → low): + 1. custom_model setting (explicit user override from db) + 2. session-explicit model (differs from SESSION_DEFAULT_MODEL) + 3. omit model_id — lets the agent's kwargs.setdefault govern (fix #841) + + Note: if registry.resolve_model() already promoted model_id before this + call, it is forwarded as-is via branch 2 (resolve_model result ≠ default). + """ + suffix = " (streaming)" if streaming else "" + kwargs: dict = {"silent_mode": not streaming, "debug": False} + if streaming: + kwargs["streaming"] = True + + if custom_model: + kwargs["model_id"] = custom_model + logger.info("create_agent: custom_model override -> %s%s", custom_model, suffix) + elif model_id and model_id != _DB_DEFAULT_MODEL: + kwargs["model_id"] = model_id + logger.info("create_agent: session-explicit model -> %s%s", model_id, suffix) + else: + # Omit model_id so kwargs.setdefault in the agent's __init__ fires. + # setdefault only works when the key is ABSENT. Passing the DB default + # (or None / empty) explicitly defeats it — this is the fix for #841. + logger.info( + "create_agent: omitting model_id kwarg (session at DB default %s); " + "agent's kwargs.setdefault or AgentConfig fallback will govern%s", + _DB_DEFAULT_MODEL, + suffix, + ) + return kwargs + + +def _effective_model(agent, fallback: str | None) -> str | None: + """Return agent.model_id if set, else fallback. + + Uses explicit None check (not `or`) to avoid treating empty-string + model_id as missing — which would silently load the wrong model. + """ + effective = getattr(agent, "model_id", None) + return effective if effective is not None else fallback + + def get_cached_mcp_status() -> list[dict]: """Return the last known MCP server connection status from any cached agent.""" with _mcp_status_lock: @@ -559,34 +607,21 @@ def _do_chat(): agent_type, session_id[:8], ) - create_kwargs: dict = {"silent_mode": True, "debug": False} - if custom_model: - create_kwargs["model_id"] = custom_model - logger.info( - "create_agent: custom_model override -> %r", custom_model - ) - elif model_id and model_id != _DB_DEFAULT_MODEL: - create_kwargs["model_id"] = model_id - logger.info("create_agent: session-explicit model -> %r", model_id) - else: - # Omit model_id so kwargs.setdefault in the agent's __init__ fires. - # setdefault only works when the key is ABSENT — passing None or the - # DB default explicitly defeats it. This is the fix for issue #841. - logger.info( - "create_agent: omitting model_id kwarg (session at DB default %r); " - "agent's kwargs.setdefault or AgentConfig fallback will govern", - _DB_DEFAULT_MODEL, - ) - agent = registry.create_agent(agent_type, **create_kwargs) + agent = registry.create_agent( + agent_type, + **_build_create_kwargs( + custom_model=custom_model, model_id=model_id + ), + ) logger.info( "chat: Invoking agent %s for session %s, model=%s", agent_type, session_id[:8], - getattr(agent, "model_id", model_id), + _effective_model(agent, model_id), ) _store_agent( session_id, - getattr(agent, "model_id", None) or model_id, + _effective_model(agent, model_id), document_ids, agent, agent_type, @@ -610,10 +645,10 @@ def _do_chat(): agent.conversation_history.append({"role": "assistant", "content": a}) # Pre-flight on agent's ACTUAL effective model. When model_id kwarg was - # omitted above, the agent's __init__ set model_id via kwargs.setdefault — - # a value invisible to us pre-construction. Using agent.model_id preserves + # omitted, the agent's __init__ set model_id via kwargs.setdefault — + # a value invisible pre-construction. Using _effective_model preserves # the existing 100-900s silent-hang protection for all code paths. - _maybe_load_expected_model(getattr(agent, "model_id", None) or model_id) + _maybe_load_expected_model(_effective_model(agent, model_id)) result = agent.process_query(request.message) if isinstance(result, dict): @@ -940,36 +975,20 @@ def _run_agent(): session_id[:8], ) t_construct = _time.monotonic() - create_kwargs = { - "streaming": True, - "silent_mode": False, - "debug": False, - } - if custom_model: - create_kwargs["model_id"] = custom_model - logger.info( - "create_agent: custom_model override -> %r (streaming)", - custom_model, - ) - elif model_id and model_id != _DB_DEFAULT_MODEL: - create_kwargs["model_id"] = model_id - logger.info( - "create_agent: session-explicit model -> %r (streaming)", - model_id, - ) - else: - logger.info( - "create_agent: omitting model_id kwarg (session at DB default %r); " - "agent's kwargs.setdefault or AgentConfig fallback will govern (streaming)", - _DB_DEFAULT_MODEL, - ) - agent = registry.create_agent(agent_type, **create_kwargs) + agent = registry.create_agent( + agent_type, + **_build_create_kwargs( + custom_model=custom_model, + model_id=model_id, + streaming=True, + ), + ) agent.console = sse_handler logger.info( "chat: Invoking agent %s for session %s, model=%s took=%.3fs", agent_type, session_id[:8], - getattr(agent, "model_id", model_id), + _effective_model(agent, model_id), _time.monotonic() - t_construct, ) @@ -982,7 +1001,7 @@ def _run_agent(): _store_agent( session_id, - getattr(agent, "model_id", None) or model_id, + _effective_model(agent, model_id), document_ids, agent, agent_type, @@ -1040,7 +1059,7 @@ def _run_agent(): # invisible pre-construction. Using agent.model_id preserves the existing # 100-900s silent-hang protection for all code paths including setdefault. _maybe_load_expected_model( - getattr(agent, "model_id", None) or model_id, sse_handler + _effective_model(agent, model_id), sse_handler ) # -- Phase 5: Query processing -- diff --git a/src/gaia/ui/database.py b/src/gaia/ui/database.py index a0d037e92..7305f6e8f 100644 --- a/src/gaia/ui/database.py +++ b/src/gaia/ui/database.py @@ -20,6 +20,10 @@ DEFAULT_DB_PATH = Path.home() / ".gaia" / "chat" / "gaia_chat.db" +# Default model for new sessions — kept in sync with the SQL schema DEFAULT and +# any code that reads session["model"] and falls back when the field is NULL. +SESSION_DEFAULT_MODEL = "Qwen3.5-35B-A3B-GGUF" + SCHEMA_SQL = """ -- Global document library CREATE TABLE IF NOT EXISTS documents ( @@ -230,7 +234,7 @@ def create_session( """Create a new chat session.""" session_id = str(uuid.uuid4()) now = self._now() - model = model or "Qwen3.5-35B-A3B-GGUF" + model = model or SESSION_DEFAULT_MODEL title = title or "New Chat" agent_type = agent_type or "chat" diff --git a/tests/unit/chat/ui/test_chat_helpers_model_resolution.py b/tests/unit/chat/ui/test_chat_helpers_model_resolution.py index fb402e409..19da2467a 100644 --- a/tests/unit/chat/ui/test_chat_helpers_model_resolution.py +++ b/tests/unit/chat/ui/test_chat_helpers_model_resolution.py @@ -18,9 +18,7 @@ from pathlib import Path from unittest.mock import MagicMock, patch -# DB default must match the value used by gaia.ui.database.create_session -_DB_DEFAULT = "Qwen3.5-35B-A3B-GGUF" - +from gaia.ui.database import SESSION_DEFAULT_MODEL as _DB_DEFAULT # ── Helpers ────────────────────────────────────────────────────────────────── @@ -211,9 +209,10 @@ def test_no_direct_model_id_kwarg_in_create_agent_calls(self): import re src = (Path(__file__).parents[4] / "src/gaia/ui/_chat_helpers.py").read_text() - # Matches the old antipattern: create_agent(... model_id=model_id ...) - # Uses DOTALL so it catches multiline calls. - match = re.search(r"create_agent\([^)]*model_id=model_id", src, re.DOTALL) + # Matches the old antipattern: create_agent(... model_id=model_id ...) as a + # DIRECT kwarg (not inside a nested call like _build_create_kwargs). + # [^()]* stops at any parenthesis so nested helper calls aren't matched. + match = re.search(r"create_agent\([^()]*model_id=model_id", src, re.DOTALL) assert not match, ( "Issue #841 regression: registry.create_agent must not receive " "model_id=model_id as a direct kwarg. Build create_kwargs conditionally " From 8f5c7621ca3a1aeb79e046fafd7754ca4929e56d Mon Sep 17 00:00:00 2001 From: Tomasz Iniewicz Date: Wed, 22 Apr 2026 05:14:20 -0400 Subject: [PATCH 3/7] fix(ui): restore intent-key for agent cache store to fix miss regression (#842) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit _store_agent was changed by the #842 fix to use _effective_model(agent, model_id) as the cache key — the post-construction value set by kwargs.setdefault. _get_cached_agent still looks up using the pre-construction model_id variable. For custom agents whose setdefault model differs from the session model, the keys never match and the agent is rebuilt on every turn. Revert the two _store_agent call sites to use model_id (the pre-construction intent key), matching what the lookup uses. _effective_model stays at the two _maybe_load_expected_model sites (Lemonade pre-flight needs the actual model) and in log statements (observability). Add two regression guards: - test_cache_hit_on_second_turn_for_setdefault_agent: two-turn cache-hit test with four assertions (call count, object identity, stored-key equality, agent.model_id). Covers the builder/template.py setdefault pattern. - test_no_effective_model_in_store_agent_calls: static grep guard that asserts _store_agent never receives _effective_model(...) as a positional arg, preventing this pattern from silently returning in a future cleanup pass. --- src/gaia/ui/_chat_helpers.py | 4 +- .../ui/test_chat_helpers_model_resolution.py | 80 +++++++++++++++++++ 2 files changed, 82 insertions(+), 2 deletions(-) diff --git a/src/gaia/ui/_chat_helpers.py b/src/gaia/ui/_chat_helpers.py index 2aeae8d6f..912a139e8 100644 --- a/src/gaia/ui/_chat_helpers.py +++ b/src/gaia/ui/_chat_helpers.py @@ -621,7 +621,7 @@ def _do_chat(): ) _store_agent( session_id, - _effective_model(agent, model_id), + model_id, document_ids, agent, agent_type, @@ -1001,7 +1001,7 @@ def _run_agent(): _store_agent( session_id, - _effective_model(agent, model_id), + model_id, document_ids, agent, agent_type, diff --git a/tests/unit/chat/ui/test_chat_helpers_model_resolution.py b/tests/unit/chat/ui/test_chat_helpers_model_resolution.py index 19da2467a..775af47ef 100644 --- a/tests/unit/chat/ui/test_chat_helpers_model_resolution.py +++ b/tests/unit/chat/ui/test_chat_helpers_model_resolution.py @@ -190,6 +190,69 @@ def test_non_streaming_path_silent_mode_true_preserved(self): "kwargs", {} ), "Non-streaming path must not pass streaming=True to create_agent" + def test_cache_hit_on_second_turn_for_setdefault_agent(self): + """Cache regression guard for #842 fix: custom agents must hit the cache + on turn 2 even when their setdefault model differs from the session model. + + Pre-fix _store_agent used _effective_model(agent, model_id) (the + post-construction value, e.g. "SetdefaultChose-GGUF") as the cache key, + while _get_cached_agent looked up using the pre-construction model_id + (the DB default). The keys never matched → cache miss every turn. + + After the fix, _store_agent uses model_id (pre-construction intent) + and the keys agree regardless of what setdefault chose. + """ + import gaia.ui._chat_helpers as _helpers + from gaia.ui._chat_helpers import _get_chat_response + from gaia.ui.models import ChatRequest + + sid = "cache-test-session" + registry, _ = _make_registry(setdefault_model="SetdefaultChose-GGUF") + db = _make_db(custom_model=None) + session = dict(_make_session(model=_DB_DEFAULT)) + session["session_id"] = sid + + # Clear cache once; do NOT clear between turns (that's the whole point). + with _helpers._agent_cache_lock: + _helpers._agent_cache.clear() + + request = ChatRequest(session_id=sid, message="hi", stream=False) + + with ( + patch("gaia.ui._chat_helpers._agent_registry", registry), + patch("gaia.ui._chat_helpers._maybe_load_expected_model"), + ): + # Turn 1 — agent constructed, stored in cache. + _run_sync(_get_chat_response(db, session, request)) + first_agent = _helpers._agent_cache.get(sid, {}).get("agent") + + # Turn 2 — must hit the cache; no second create_agent call. + _run_sync(_get_chat_response(db, session, request)) + second_agent = _helpers._agent_cache.get(sid, {}).get("agent") + + # 1. Only one construction (cache hit on turn 2). + assert registry.create_agent.call_count == 1, ( + f"Cache regression: create_agent called {registry.create_agent.call_count} " + "times; expected 1 (turn 2 must be a cache hit, not a rebuild)" + ) + # 2. Object identity proves the cache returned the same agent. + assert second_agent is first_agent, ( + "Cache regression: turn 2 returned a different agent object — " + "cache hit must return the SAME instance, not a reconstructed one" + ) + # 3. Stored key is the pre-construction intent (the actual regression pin). + stored_model = _helpers._agent_cache.get(sid, {}).get("model_id") + assert stored_model == _DB_DEFAULT, ( + f"Cache regression: stored model_id={stored_model!r} must equal the " + f"pre-construction session model {_DB_DEFAULT!r}, not the agent's " + "post-setdefault value — otherwise lookup/store keys diverge" + ) + # 4. Agent's own model_id reflects what setdefault chose. + assert first_agent.model_id == "SetdefaultChose-GGUF", ( + f"Agent model_id={first_agent.model_id!r} must reflect kwargs.setdefault " + "value 'SetdefaultChose-GGUF'" + ) + class TestStaticRegressionGuard: """Source-level pin against reintroduction of the antipattern.""" @@ -220,6 +283,23 @@ def test_no_direct_model_id_kwarg_in_create_agent_calls(self): f"Match found at: {match.group()[:80]!r}" ) + def test_no_effective_model_in_store_agent_calls(self): + """Cache-key divergence guard for the #842 fix. _store_agent is the cache + STORE; its 2nd arg must be the pre-construction model_id so it matches + the lookup key used by _get_cached_agent. Passing _effective_model(...) + (post-construction) causes the store/lookup keys to diverge whenever the + agent's setdefault differs from the session model — agents rebuild every turn. + """ + import re + + src = (Path(__file__).parents[4] / "src/gaia/ui/_chat_helpers.py").read_text() + match = re.search(r"_store_agent\([^()]*_effective_model", src, re.DOTALL) + assert not match, ( + "Cache regression (#842): _store_agent must not receive _effective_model(...) " + "as a positional arg — store/lookup keys would diverge for setdefault agents. " + f"Match: {match.group()[:80]!r}" + ) + class TestPostConstructionPreflight: """Verify pre-flight uses agent.model_id (not pre-call model_id variable).""" From a0fdb10943c1d179708846ad58704a95195793c3 Mon Sep 17 00:00:00 2001 From: Kalin Ovtcharov Date: Mon, 20 Apr 2026 14:42:27 -0700 Subject: [PATCH 4/7] docs(plans): fix broken CMU link to EMNLP 2004 Email Speech Acts paper (#817) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary One-line fix: swap the failing `www.cs.cmu.edu/~tom/EMNLP2004_final.pdf` URL in `docs/plans/email-triage-agent.mdx:2601` for the canonical ACL Anthology record at [W04-3240](https://aclanthology.org/W04-3240/). The CMU URL fails DNS resolution in CI (see [recent run](https://github.com/amd/gaia/actions/runs/24595902571/job/72072156929)), breaking the ``Verify external URLs`` check for every open PR that touches docs. ACL Anthology is the permanent archive for ACL/EMNLP papers — stable URL, no more link rot. Also restored the paper's actual full title ("Learning to Classify Email into 'Speech Acts'") for consistency with the other full-title citations in the same references list. ## Test plan - [x] `curl -sI https://aclanthology.org/W04-3240/` returns 200 - [ ] After merge, `Verify external URLs` check should go green on downstream PRs --- docs/plans/email-triage-agent.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/plans/email-triage-agent.mdx b/docs/plans/email-triage-agent.mdx index f515efd0e..95d71cd6d 100644 --- a/docs/plans/email-triage-agent.mdx +++ b/docs/plans/email-triage-agent.mdx @@ -2598,7 +2598,7 @@ Choices the spec implies but does not resolve: - [Whittaker & Sidner, "Email Overload" (CHI 1996)](https://dl.acm.org/doi/10.1145/238386.238530) - [Bellotti et al., "Taking Email to Task" / Taskmaster (CHI 2003)](https://www.semanticscholar.org/paper/Taking-email-to-task/8a28a1ee766d87ca9acbd741a7c1972d69217359) - [Aberdeen, Pacovsky & Slater, "Gmail Priority Inbox" (NIPS 2010)](https://research.google/pubs/pub36955/) -- [Cohen, Carvalho & Mitchell, "Email Speech Acts" (EMNLP 2004)](https://www.cs.cmu.edu/~tom/EMNLP2004_final.pdf) +- [Cohen, Carvalho & Mitchell, "Learning to Classify Email into 'Speech Acts'" (EMNLP 2004)](https://aclanthology.org/W04-3240/) - [Vellum, "Levels of Agentic Behavior"](https://www.vellum.ai/blog/levels-of-agentic-behavior) - [Knight Institute, "Levels of Autonomy for AI Agents"](https://knightcolumbia.org/content/levels-of-autonomy-for-ai-agents-1) From 3b51ca92c5ff18a962e629e5fb9c94afd258e9de Mon Sep 17 00:00:00 2001 From: Tomasz Iniewicz Date: Wed, 22 Apr 2026 18:21:52 -0400 Subject: [PATCH 5/7] style(mcp): apply Black formatting to mcp_bridge.py (CI lint fix) --- src/gaia/mcp/mcp_bridge.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/gaia/mcp/mcp_bridge.py b/src/gaia/mcp/mcp_bridge.py index 91333f83d..9ee1468f9 100644 --- a/src/gaia/mcp/mcp_bridge.py +++ b/src/gaia/mcp/mcp_bridge.py @@ -628,7 +628,10 @@ def handle_jsonrpc(self, data): 400, { "jsonrpc": "2.0", - "error": {"code": -32600, "message": "Invalid Request: expected JSON object"}, + "error": { + "code": -32600, + "message": "Invalid Request: expected JSON object", + }, "id": None, }, ) From 28bf5f7906fff76da6c2274527342156570902f1 Mon Sep 17 00:00:00 2001 From: Tomasz Iniewicz Date: Thu, 23 Apr 2026 19:07:58 -0400 Subject: [PATCH 6/7] Release v0.17.4 Patch release: custom agents now honor their declared model, and the C++ library no longer crashes on null JSON fields from smaller LLMs. - Custom Agent UI agents honor kwargs.setdefault("model_id", ...) when the session is at the DB default (#841, follow-up #842 restores cache hits). - C++ library adds null-safety guards in parseLlmResponse() to tolerate smaller LLMs that return null for "tool" or "content" (#780). - Docs: swap broken CMU link for canonical ACL Anthology URL (#817). --- docs/docs.json | 3 +- docs/releases/v0.17.4.mdx | 61 ++++++++++++++++++++++++++++++++ src/gaia/apps/webui/package.json | 2 +- 3 files changed, 64 insertions(+), 2 deletions(-) create mode 100644 docs/releases/v0.17.4.mdx diff --git a/docs/docs.json b/docs/docs.json index 582f5b27c..f00207b3b 100644 --- a/docs/docs.json +++ b/docs/docs.json @@ -403,6 +403,7 @@ "group": "Release Notes", "pages": [ "releases/index", + "releases/v0.17.4", "releases/v0.17.3", "releases/v0.17.2", "releases/v0.17.1", @@ -450,7 +451,7 @@ "navbar": { "links": [ { - "label": "v0.17.3 \u00b7 Lemonade 10.0.0", + "label": "v0.17.4 \u00b7 Lemonade 10.0.0", "href": "https://github.com/amd/gaia/releases" }, { diff --git a/docs/releases/v0.17.4.mdx b/docs/releases/v0.17.4.mdx new file mode 100644 index 000000000..b63972ca9 --- /dev/null +++ b/docs/releases/v0.17.4.mdx @@ -0,0 +1,61 @@ +--- +title: "v0.17.4" +description: "Patch release: custom agents now honor their declared model, and the C++ library no longer crashes on null JSON fields from smaller LLMs." +--- + +# GAIA v0.17.4 Release Notes + +GAIA v0.17.4 is a focused patch release that fixes two correctness bugs users hit in practice: custom Agent UI agents silently ignoring their declared model, and the C++ library crashing on tool-call responses from smaller LLMs. Upgrade if you build custom agents or run GAIA against quantized/smaller models. + +```bash +pip install --upgrade amd-gaia +``` + +**Why upgrade:** +- **Custom agents finally use the model they declare** — If your agent sets a model via `kwargs.setdefault("model_id", ...)`, the Agent UI now actually runs against that model when the user's session is at the DB default, instead of silently falling back to the session model. A follow-up fix also restores agent-cache hits on the second turn so custom-model agents aren't rebuilt every request. +- **No more crashes on smaller LLMs** — The C++ library previously crashed with `json.exception.type_error.302` when models like `qwen3.5:9b` returned `null` for `"tool"` or `"content"` fields. Those fields are now parsed safely. + +--- + +## What's New + +### Custom Agent `model_id` Respected in the Agent UI + +Previously, `_chat_helpers.py` always passed `model_id=` explicitly to `registry.create_agent()`, which defeated `kwargs.setdefault("model_id", ...)` in custom agents — `setdefault` only fires when the key is absent (PR [#841](https://github.com/amd/gaia/pull/841)). + +**What you can do:** +- Ship a custom agent whose template sets `kwargs.setdefault("model_id", "my-model")` and have the Agent UI actually use `my-model` when the session is at the DB default. +- Three-branch precedence for model selection is now explicit: `custom_model` setting > session-explicit model > agent's own `setdefault`. + +**Under the hood:** +- Extracted `_build_create_kwargs()` and `_effective_model()` helpers in `src/gaia/ui/_chat_helpers.py` to eliminate duplicate three-branch logic across streaming and non-streaming paths. +- `SESSION_DEFAULT_MODEL` now exported from `database.py` as the single source of truth. +- `_effective_model()` uses an explicit `None` check rather than `or`, so an empty string is not treated as a missing model. +- Follow-up PR [#842](https://github.com/amd/gaia/pull/842) restored the pre-construction `model_id` as the agent-cache key so lookups and stores match — custom-model agents now hit the cache on turn two instead of being rebuilt every request. Added a two-turn cache-hit regression test plus a static guard that asserts `_store_agent` never receives `_effective_model(...)` as a positional arg. + +--- + +### C++ Library: Null-Safety in LLM Response Parsing + +Added `.is_string()` / `.is_null()` guards before all `.get()` calls on LLM response JSON fields in `cpp/src/json_utils.cpp::parseLlmResponse()` (PR [#780](https://github.com/amd/gaia/pull/780)). Fixes a crash (`json.exception.type_error.302: type must be string, but is null`) when smaller models (e.g. `qwen3.5:9b`) return `null` for `"tool"` or `"content"` instead of omitting them. `json.contains()` returns `true` for `null` values, so the existing checks were insufficient. + +--- + +## Full Changelog + +- `8fc43f3f` — fix(cpp): add null-safety checks for JSON string fields in LLM response parsing (#780) +- `62722de2` — fix(ui): honor custom agent model_id when session is at DB default (#841) +- `4acfd400` — fix(ui): extract _build_create_kwargs/_effective_model, import SESSION_DEFAULT_MODEL +- `8f5c7621` — fix(ui): restore intent-key for agent cache store to fix miss regression (#842) +- `a0fdb109` — docs(plans): fix broken CMU link to EMNLP 2004 Email Speech Acts paper (#817) + +Full Changelog: [v0.17.3...v0.17.4](https://github.com/amd/gaia/compare/v0.17.3...v0.17.4) + +--- + +## Release checklist +- [x] `util/validate_release_notes.py docs/releases/v0.17.4.mdx --tag v0.17.4` passes +- [x] `src/gaia/version.py` → `0.17.4` +- [x] `src/gaia/apps/webui/package.json` → `0.17.4` +- [x] Navbar label in `docs/docs.json` → `v0.17.4 · Lemonade 10.0.0` +- [ ] Review from @kovtcharov-amd addressed diff --git a/src/gaia/apps/webui/package.json b/src/gaia/apps/webui/package.json index 14a942e9b..9c4e827d0 100644 --- a/src/gaia/apps/webui/package.json +++ b/src/gaia/apps/webui/package.json @@ -1,6 +1,6 @@ { "name": "@amd-gaia/agent-ui", - "version": "0.17.3", + "version": "0.17.4", "type": "module", "productName": "GAIA Agent UI", "description": "Privacy-first agentic AI interface with document Q&A - runs 100% locally on AMD Ryzen AI", From 1e19ce16344813cb587dc16c5fc1ad1ee9ea9f99 Mon Sep 17 00:00:00 2001 From: Tomasz Iniewicz Date: Thu, 23 Apr 2026 19:10:47 -0400 Subject: [PATCH 7/7] docs(release): tone down v0.17.4 notes, drop install block, match v0.17.1 style --- docs/releases/v0.17.4.mdx | 43 +++++++++++++++------------------------ 1 file changed, 16 insertions(+), 27 deletions(-) diff --git a/docs/releases/v0.17.4.mdx b/docs/releases/v0.17.4.mdx index b63972ca9..770a16c9b 100644 --- a/docs/releases/v0.17.4.mdx +++ b/docs/releases/v0.17.4.mdx @@ -1,19 +1,15 @@ --- title: "v0.17.4" -description: "Patch release: custom agents now honor their declared model, and the C++ library no longer crashes on null JSON fields from smaller LLMs." +description: "Custom-agent model selection, C++ null-safety, and docs link fix" --- # GAIA v0.17.4 Release Notes -GAIA v0.17.4 is a focused patch release that fixes two correctness bugs users hit in practice: custom Agent UI agents silently ignoring their declared model, and the C++ library crashing on tool-call responses from smaller LLMs. Upgrade if you build custom agents or run GAIA against quantized/smaller models. - -```bash -pip install --upgrade amd-gaia -``` +GAIA v0.17.4 is a patch release covering two correctness fixes in the Agent UI custom-agent path, a null-safety fix in the C++ library for smaller LLMs, and a broken docs citation. **Why upgrade:** -- **Custom agents finally use the model they declare** — If your agent sets a model via `kwargs.setdefault("model_id", ...)`, the Agent UI now actually runs against that model when the user's session is at the DB default, instead of silently falling back to the session model. A follow-up fix also restores agent-cache hits on the second turn so custom-model agents aren't rebuilt every request. -- **No more crashes on smaller LLMs** — The C++ library previously crashed with `json.exception.type_error.302` when models like `qwen3.5:9b` returned `null` for `"tool"` or `"content"` fields. Those fields are now parsed safely. +- **Custom agents use their declared model** — If a custom agent sets a model via `kwargs.setdefault("model_id", ...)`, the Agent UI now respects that setting when the session is at the DB default, instead of falling back to the session model. +- **Compatibility with smaller LLMs in the C++ library** — The C++ JSON parser now tolerates `null` values in `"tool"` and `"content"` fields, which some smaller models emit in place of omitting the field. --- @@ -21,28 +17,30 @@ pip install --upgrade amd-gaia ### Custom Agent `model_id` Respected in the Agent UI -Previously, `_chat_helpers.py` always passed `model_id=` explicitly to `registry.create_agent()`, which defeated `kwargs.setdefault("model_id", ...)` in custom agents — `setdefault` only fires when the key is absent (PR [#841](https://github.com/amd/gaia/pull/841)). +`_chat_helpers.py` previously passed `model_id=` explicitly to `registry.create_agent()`, which defeated `kwargs.setdefault("model_id", ...)` in custom agents — `setdefault` only fires when the key is absent (PR [#841](https://github.com/amd/gaia/pull/841)). The Agent UI now builds `create_kwargs` conditionally, omitting `model_id` when the session is at the DB default so the agent's `__init__` setdefault governs. Three-branch precedence is now explicit: `custom_model` setting > session-explicit model > agent's own `setdefault`. -**What you can do:** -- Ship a custom agent whose template sets `kwargs.setdefault("model_id", "my-model")` and have the Agent UI actually use `my-model` when the session is at the DB default. -- Three-branch precedence for model selection is now explicit: `custom_model` setting > session-explicit model > agent's own `setdefault`. +A follow-up fix (PR [#842](https://github.com/amd/gaia/pull/842)) restored the pre-construction `model_id` as the agent-cache key. The initial PR #841 landing had switched `_store_agent` to use the post-construction `_effective_model(agent, model_id)` while `_get_cached_agent` still looked up with `model_id`, so keys never matched for custom-model agents and the agent was rebuilt on every turn. A two-turn cache-hit regression test and a static guard on `_store_agent` call sites were added alongside the fix. -**Under the hood:** -- Extracted `_build_create_kwargs()` and `_effective_model()` helpers in `src/gaia/ui/_chat_helpers.py` to eliminate duplicate three-branch logic across streaming and non-streaming paths. -- `SESSION_DEFAULT_MODEL` now exported from `database.py` as the single source of truth. -- `_effective_model()` uses an explicit `None` check rather than `or`, so an empty string is not treated as a missing model. -- Follow-up PR [#842](https://github.com/amd/gaia/pull/842) restored the pre-construction `model_id` as the agent-cache key so lookups and stores match — custom-model agents now hit the cache on turn two instead of being rebuilt every request. Added a two-turn cache-hit regression test plus a static guard that asserts `_store_agent` never receives `_effective_model(...)` as a positional arg. +Supporting refactor: extracted `_build_create_kwargs()` and `_effective_model()` helpers in `src/gaia/ui/_chat_helpers.py` to deduplicate the three-branch logic across streaming and non-streaming paths, and exported `SESSION_DEFAULT_MODEL` from `database.py` as the single source of truth. --- ### C++ Library: Null-Safety in LLM Response Parsing -Added `.is_string()` / `.is_null()` guards before all `.get()` calls on LLM response JSON fields in `cpp/src/json_utils.cpp::parseLlmResponse()` (PR [#780](https://github.com/amd/gaia/pull/780)). Fixes a crash (`json.exception.type_error.302: type must be string, but is null`) when smaller models (e.g. `qwen3.5:9b`) return `null` for `"tool"` or `"content"` instead of omitting them. `json.contains()` returns `true` for `null` values, so the existing checks were insufficient. +`parseLlmResponse()` in `cpp/src/json_utils.cpp` now guards `.get()` calls on the `"tool"` and `"answer"` JSON fields with `.is_string()` / `.is_null()` checks (PR [#780](https://github.com/amd/gaia/pull/780)). This fixes a crash (`json.exception.type_error.302: type must be string, but is null`) when smaller LLMs (for example `qwen3.5:9b`) return `null` for those fields instead of omitting them. `json.contains()` returns `true` for `null` values, so the existing presence checks were insufficient. + +--- + +## Bug Fixes + +- **Email-triage agent plan: broken CMU citation link** (PR [#817](https://github.com/amd/gaia/pull/817)) — Swapped the failing `www.cs.cmu.edu/~tom/EMNLP2004_final.pdf` URL in `docs/plans/email-triage-agent.mdx` for the canonical ACL Anthology record at [W04-3240](https://aclanthology.org/W04-3240/). The CMU URL was failing DNS resolution in CI, breaking the `Verify external URLs` check on every open docs PR. Restored the paper's full title ("Learning to Classify Email into 'Speech Acts'") for consistency with other citations in the same references list. --- ## Full Changelog +**5 commits** since v0.17.3: + - `8fc43f3f` — fix(cpp): add null-safety checks for JSON string fields in LLM response parsing (#780) - `62722de2` — fix(ui): honor custom agent model_id when session is at DB default (#841) - `4acfd400` — fix(ui): extract _build_create_kwargs/_effective_model, import SESSION_DEFAULT_MODEL @@ -50,12 +48,3 @@ Added `.is_string()` / `.is_null()` guards before all `.get()` call - `a0fdb109` — docs(plans): fix broken CMU link to EMNLP 2004 Email Speech Acts paper (#817) Full Changelog: [v0.17.3...v0.17.4](https://github.com/amd/gaia/compare/v0.17.3...v0.17.4) - ---- - -## Release checklist -- [x] `util/validate_release_notes.py docs/releases/v0.17.4.mdx --tag v0.17.4` passes -- [x] `src/gaia/version.py` → `0.17.4` -- [x] `src/gaia/apps/webui/package.json` → `0.17.4` -- [x] Navbar label in `docs/docs.json` → `v0.17.4 · Lemonade 10.0.0` -- [ ] Review from @kovtcharov-amd addressed