From 11e5798cc22ee5d158a61b66908f4b60855853cd Mon Sep 17 00:00:00 2001 From: Greg Holmes Date: Tue, 12 May 2026 10:29:30 +0100 Subject: [PATCH] fix: lowercase bool query params on websocket connect urllib.parse.urlencode stringifies Python bools via str() to "True"/"False", which the API rejects with HTTP 400. The four streaming connect paths (listen v1/v2, speak v1, agent v1) all hit this; HTTP raw clients are unaffected because httpx lowercases bools itself. Coerce True/False to "true"/"false" in core/query_encoder.py before urlencode runs. Covers scalar params, list-of-scalars, and bools nested in dict/list query values. Freeze the file via .fernignore and add a regression test pinning the behavior at the encoder layer. --- .fernignore | 8 ++++ AGENTS.md | 2 + src/deepgram/core/query_encoder.py | 16 +++++-- tests/custom/test_query_encoder.py | 67 ++++++++++++++++++++++++++++++ 4 files changed, 89 insertions(+), 4 deletions(-) create mode 100644 tests/custom/test_query_encoder.py diff --git a/.fernignore b/.fernignore index 9752a0aa..a7f67968 100644 --- a/.fernignore +++ b/.fernignore @@ -61,9 +61,17 @@ src/deepgram/agent/v1/requests/__init__.py src/deepgram/types/__init__.py src/deepgram/requests/__init__.py +# Coerces Python bools to lowercase "true"/"false" before urlencode, which +# otherwise stringifies via str() and produces "True"/"False" — rejected by +# Deepgram's websocket query strings. HTTP raw_clients hand params to httpx +# directly and are unaffected; httpx accepts the pre-stringified values fine. +# [temporarily frozen — manual patches listed above] +src/deepgram/core/query_encoder.py + # Hand-written custom tests tests/custom/test_agent_history.py tests/custom/test_compat_aliases.py +tests/custom/test_query_encoder.py tests/custom/test_text_builder.py tests/custom/test_transport.py tests/typecheck/compat_aliases.py diff --git a/AGENTS.md b/AGENTS.md index fbc80850..da59ccba 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -30,6 +30,7 @@ Current permanently frozen files: - `src/deepgram/transport_interface.py`, `src/deepgram/transport.py`, `src/deepgram/transports/` — custom transport layer - `tests/custom/test_agent_history.py` — hand-written regression test for Agent History websocket payload parsing - `tests/custom/test_compat_aliases.py` — hand-written regression test for backward-compatible alias imports after regen renames +- `tests/custom/test_query_encoder.py` — hand-written regression test that `core/query_encoder.py` coerces Python bools to lowercase `"true"`/`"false"` before `urlencode` so websocket query strings stay wire-correct - `tests/custom/test_text_builder.py`, `tests/custom/test_transport.py` — hand-written tests - `tests/typecheck/compat_aliases.py` — hand-written mypy `assert_type` coverage for backward-compatible alias TypedDicts - `tests/manual/` — manual standalone tests @@ -52,6 +53,7 @@ Current temporarily frozen files: - `src/deepgram/agent/v1/socket_client.py` — same + `_sanitize_numeric_types` - `src/deepgram/agent/v1/types/agent_v1settings_agent_context.py`, `src/deepgram/agent/v1/types/agent_v1settings_agent.py`, `src/deepgram/agent/v1/types/agent_v1settings.py`, `src/deepgram/agent/v1/requests/agent_v1settings_agent_context.py`, `src/deepgram/agent/v1/requests/agent_v1settings_agent.py`, `src/deepgram/agent/v1/requests/agent_v1settings.py` — backward-compat patches for the 2026-05-05 Agent Settings schema restructure. These preserve callable `AgentV1SettingsAgent(...)`, keep `AgentV1Settings.agent` accepting both that wrapper and `agent_id` strings, restore the legacy request TypedDict shapes, remap legacy `messages=[...]` / nested `context=AgentV1SettingsAgentContext(messages=[...])` usage into the new `context={"messages": [...]}` wire shape, and keep read-side `obj.messages` access working. - `src/deepgram/agent/v1/types/agent_v1settings_audio_output.py` — keeps `audio.output.container` typed as `str` for backward compatibility instead of the regenerated enum alias +- `src/deepgram/core/query_encoder.py` — coerces Python bools to lowercase `"true"`/`"false"` before they reach `urllib.parse.urlencode` (which would otherwise produce `"True"`/`"False"` via `str()` and break websocket query strings). Only the four `*/connect()` paths call `urlencode`; HTTP raw clients hand params to httpx, which lowercases bools itself, so the patch is a no-op for the HTTP path. Once Fern's websocket codegen normalizes bools (or the spec types these as `boolean` end-to-end), this can be unfrozen. - `tests/wire/test_manage_v1_projects_keys.py` — restored wire coverage for the legacy `CreateKeyV1RequestOneParams` request alias so future regens do not silently drop that compatibility check - `src/deepgram/__init__.py`, `src/deepgram/agent/__init__.py`, `src/deepgram/agent/v1/__init__.py`, `src/deepgram/agent/v1/types/__init__.py`, `src/deepgram/agent/v1/requests/__init__.py`, `src/deepgram/types/__init__.py`, `src/deepgram/requests/__init__.py` — package `__init__.py` files carrying hand-applied legacy alias re-exports for `CreateKeyV1RequestOne`, `AgentV1HistoryContent`, `AgentV1HistoryFunctionCalls`, `AgentV1SettingsAgentContextMessagesItemContent`, `AgentV1SettingsAgentContextMessagesItemFunctionCalls` (and their `*Params` variants). Fern would otherwise regenerate these and strip the legacy entries. After unfreezing for the next regen and reviewing the new generated content, re-apply the legacy re-exports plus any genuine new entries Fern added. diff --git a/src/deepgram/core/query_encoder.py b/src/deepgram/core/query_encoder.py index 3183001d..6b81f9bc 100644 --- a/src/deepgram/core/query_encoder.py +++ b/src/deepgram/core/query_encoder.py @@ -5,6 +5,14 @@ import pydantic +def _coerce_query_value(value: Any) -> Any: + # urllib.parse.urlencode stringifies bools via str(), producing "True"/"False"; + # APIs (including Deepgram's websocket endpoints) expect lowercase. + if isinstance(value, bool): + return "true" if value else "false" + return value + + # Flattens dicts to be of the form {"key[subkey][subkey2]": value} where value is not a dict def traverse_query_dict(dict_flat: Dict[str, Any], key_prefix: Optional[str] = None) -> List[Tuple[str, Any]]: result = [] @@ -17,9 +25,9 @@ def traverse_query_dict(dict_flat: Dict[str, Any], key_prefix: Optional[str] = N if isinstance(arr_v, dict): result.extend(traverse_query_dict(arr_v, key)) else: - result.append((key, arr_v)) + result.append((key, _coerce_query_value(arr_v))) else: - result.append((key, v)) + result.append((key, _coerce_query_value(v))) return result @@ -41,11 +49,11 @@ def single_query_encoder(query_key: str, query_value: Any) -> List[Tuple[str, An encoded_values.extend(single_query_encoder(query_key, obj_dict)) else: - encoded_values.append((query_key, value)) + encoded_values.append((query_key, _coerce_query_value(value))) return encoded_values - return [(query_key, query_value)] + return [(query_key, _coerce_query_value(query_value))] def encode_query(query: Optional[Dict[str, Any]]) -> Optional[List[Tuple[str, Any]]]: diff --git a/tests/custom/test_query_encoder.py b/tests/custom/test_query_encoder.py new file mode 100644 index 00000000..93a982e2 --- /dev/null +++ b/tests/custom/test_query_encoder.py @@ -0,0 +1,67 @@ +"""Regression tests for query_encoder boolean coercion. + +Python's str(True) returns "True" (capitalized), and urllib.parse.urlencode +falls back to str() for scalar values. The Deepgram API rejects "True"/"False" +on websocket query strings, so query_encoder coerces booleans to lowercase +before they reach urlencode. +""" + +import urllib.parse + +from deepgram.core.query_encoder import encode_query, single_query_encoder + + +class TestBoolCoercion: + def test_top_level_true_becomes_lowercase(self): + assert single_query_encoder("diarize", True) == [("diarize", "true")] + + def test_top_level_false_becomes_lowercase(self): + assert single_query_encoder("diarize", False) == [("diarize", "false")] + + def test_encode_query_lowercases_bools(self): + result = encode_query({"diarize": True, "smart_format": False, "model": "nova-3"}) + assert result is not None + assert ("diarize", "true") in result + assert ("smart_format", "false") in result + assert ("model", "nova-3") in result + + def test_urlencode_roundtrip_produces_lowercase(self): + encoded = encode_query({"diarize": True, "smart_format": False}) + assert encoded is not None + query_string = urllib.parse.urlencode(encoded) + assert "diarize=true" in query_string + assert "smart_format=false" in query_string + assert "True" not in query_string + assert "False" not in query_string + + def test_bools_in_list_value_coerced(self): + result = single_query_encoder("flags", [True, False]) + assert result == [("flags", "true"), ("flags", "false")] + + def test_bools_in_nested_dict_coerced(self): + result = single_query_encoder("opts", {"a": True, "b": False}) + assert ("opts[a]", "true") in result + assert ("opts[b]", "false") in result + + def test_bools_in_nested_list_of_dicts_coerced(self): + result = single_query_encoder("items", [{"flag": True}]) + assert ("items[flag]", "true") in result + + +class TestNonBoolValuesPreserved: + def test_int_preserved(self): + assert single_query_encoder("count", 5) == [("count", 5)] + + def test_string_preserved(self): + assert single_query_encoder("name", "foo") == [("name", "foo")] + + def test_float_preserved(self): + assert single_query_encoder("rate", 1.5) == [("rate", 1.5)] + + def test_string_true_preserved(self): + assert single_query_encoder("diarize", "true") == [("diarize", "true")] + + def test_int_one_not_coerced_to_bool(self): + # bool is a subclass of int — make sure we don't accidentally coerce 1/0. + assert single_query_encoder("count", 1) == [("count", 1)] + assert single_query_encoder("count", 0) == [("count", 0)]