From fb00d939f90ea61673df938d7a7e5287fb5cbd65 Mon Sep 17 00:00:00 2001 From: Tyler Longwell <109685178+tlongwell-block@users.noreply.github.com> Date: Sat, 16 May 2026 20:09:33 -0400 Subject: [PATCH 1/3] feat(agent): add OpenAI Responses API with auto endpoint detection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit OpenAI's GPT-5 / o-series models on api.openai.com require the Responses API (/v1/responses) for tool calling; the legacy Chat Completions endpoint rejects them. OpenAI-compatible servers (vLLM, Ollama, llama.cpp, OpenRouter, Block Gateway, Databricks) almost all still speak only Chat Completions. This change teaches sprout-agent both dialects and routes between them automatically. New env: OPENAI_COMPAT_API={auto,chat,responses}, default auto. auto picks Responses for *.openai.com hosts, Chat Completions for everything else. Operators can pin the choice explicitly. Implementation: - config.rs: OpenAiApi enum + parse_openai_api_env() + auto_openai_api() with a small zero-dep host extractor. Lookalike-safe (`.openai.com` suffix match, not substring). - llm.rs: Provider::OpenAi now dispatches on cfg.openai_api. New responses_body / parse_responses pair handles the Responses wire shape (flat tool schema, input[] of typed items, max_output_tokens, output[] walk with reasoning-item skip). Serializer emits each prior assistant function_call before its function_call_output — the API rejects with "No tool call found for call_id ..." otherwise. - README.md: provider table updated, new env documented. Tests (11 new, all passing): - responses_body shape: instructions/max_output_tokens/flat tools - replay ordering invariant (function_call before function_call_output) - empty-assistant text skipped, tool_calls still serialized - image tool result → trailing input_image user message - parse: end_turn / tool_use / max_output_tokens branches - parse: rejects malformed function_call.arguments JSON - auto-detection: official OpenAI → Responses; vLLM/Ollama/OpenRouter/ Block Gateway/malformed → Chat Completions; lookalike host (api.openai.com.evil.example) → Chat Completions cargo fmt + cargo clippy --all-targets -D warnings clean. Live smoke against api.openai.com with gpt-5-mini: plain prompt, tool-roundtrip (dev__shell), and explicit chat-mode fallback all return stopReason=end_turn. See scripts at ~/scratch/sprout-agent-demos/test_openai_responses_smoke.py (out-of-tree). Signed-off-by: Tyler Longwell <109685178+tlongwell-block@users.noreply.github.com> --- crates/sprout-agent/README.md | 21 +- crates/sprout-agent/src/config.rs | 103 +++++++ crates/sprout-agent/src/llm.rs | 455 ++++++++++++++++++++++++++++-- 3 files changed, 548 insertions(+), 31 deletions(-) diff --git a/crates/sprout-agent/README.md b/crates/sprout-agent/README.md index e3b48302..5a6c2cf7 100644 --- a/crates/sprout-agent/README.md +++ b/crates/sprout-agent/README.md @@ -131,6 +131,7 @@ Everything is environment variables. No flags, no config files. (We are a subpro | `OPENAI_COMPAT_API_KEY` | — | Required when provider=openai. | | `OPENAI_COMPAT_MODEL` | — | Required when provider=openai. | | `OPENAI_COMPAT_BASE_URL` | `https://api.openai.com/v1` | Point at vLLM, llama.cpp, OpenRouter, Ollama, etc. | +| `OPENAI_COMPAT_API` | `auto` | `auto` \| `chat` \| `responses`. `auto` picks Responses for `*.openai.com`, Chat Completions everywhere else. | | `SPROUT_AGENT_SYSTEM_PROMPT` | built-in | Inline system prompt. | | `SPROUT_AGENT_SYSTEM_PROMPT_FILE` | — | File path. Mutually exclusive with the above. | | `SPROUT_AGENT_MAX_ROUNDS` | `0` | Tool-loop iteration cap. 0 = unlimited. | @@ -147,17 +148,19 @@ Everything is environment variables. No flags, no config files. (We are a subpro `sprout-agent` speaks two HTTP dialects. Pick with `SPROUT_AGENT_PROVIDER`. -| Provider | `SPROUT_AGENT_PROVIDER` | Endpoint | Tested with | +| Provider | `SPROUT_AGENT_PROVIDER` | Endpoint (auto) | Tested with | |---|---|---|---| | Anthropic | `anthropic` | `POST {base}/v1/messages` | claude-sonnet-4-5, claude-opus-4 | -| OpenAI | `openai` | `POST {base}/chat/completions` | gpt-5, gpt-4o | -| vLLM | `openai` | OpenAI-compatible endpoint | any tool-calling model | -| llama.cpp | `openai` | `--api-server` mode | any tool-calling GGUF | -| Ollama | `openai` | `http://localhost:11434/v1` | llama3.1, qwen2.5-coder | -| OpenRouter | `openai` | `https://openrouter.ai/api/v1` | anything they route | -| Block Gateway | `openai` | internal | gpt-5, claude | - -The "OpenAI" path is wire-compatible with the [Chat Completions API](https://platform.openai.com/docs/api-reference/chat). If a provider claims OpenAI compatibility and supports `tools` + `tool_choice: auto`, it works here. +| OpenAI | `openai` | `POST {base}/responses` | gpt-5, gpt-5-mini, o4-mini, gpt-4o | +| vLLM | `openai` | `POST {base}/chat/completions` | any tool-calling model | +| llama.cpp | `openai` | `POST {base}/chat/completions` | any tool-calling GGUF | +| Ollama | `openai` | `POST {base}/chat/completions` | llama3.1, qwen2.5-coder | +| OpenRouter | `openai` | `POST {base}/chat/completions` | anything they route | +| Block Gateway | `openai` | `POST {base}/chat/completions` | gpt-5, claude | + +`provider=openai` speaks two HTTP dialects: the [Responses API](https://platform.openai.com/docs/api-reference/responses) (`/v1/responses`, required for GPT-5 / o-series tool-calling on OpenAI's own service) and the [Chat Completions API](https://platform.openai.com/docs/api-reference/chat) (`/chat/completions`, the broadly-supported OpenAI-compatible wire format). + +By default (`OPENAI_COMPAT_API=auto`) the agent picks **Responses** when `OPENAI_COMPAT_BASE_URL` points at an `*.openai.com` host and **Chat Completions** everywhere else. Pin the choice explicitly with `OPENAI_COMPAT_API=chat` or `OPENAI_COMPAT_API=responses` for providers that diverge from the default (e.g. a Responses-compatible self-hosted gateway). `Provider` is a Rust `enum` with one `match` in `Llm::complete`. There is no trait, no `Box`, no async-trait. Adding a third provider is a `match` arm and one `body`/`parse` pair in `llm.rs`. diff --git a/crates/sprout-agent/src/config.rs b/crates/sprout-agent/src/config.rs index 923b69e9..08827694 100644 --- a/crates/sprout-agent/src/config.rs +++ b/crates/sprout-agent/src/config.rs @@ -28,6 +28,18 @@ pub enum Provider { OpenAi, } +/// Which OpenAI-family HTTP API to call when `provider = OpenAi`. +/// +/// `Auto` (the default) picks per `base_url`: official OpenAI gets the +/// Responses API (`/v1/responses`), everything else (vLLM, Ollama, llama.cpp, +/// OpenRouter, etc.) gets Chat Completions (`/chat/completions`). Operators +/// can pin the choice with `OPENAI_COMPAT_API={chat,responses,auto}`. +#[derive(Debug, Clone, Copy, PartialEq)] +pub enum OpenAiApi { + ChatCompletions, + Responses, +} + #[derive(Debug, Clone)] pub struct Config { pub provider: Provider, @@ -57,6 +69,10 @@ pub struct Config { pub model: String, pub base_url: String, pub anthropic_api_version: String, + /// Which OpenAI-family HTTP API to call. Ignored when `provider = + /// Anthropic`. Set via `OPENAI_COMPAT_API`; defaults to auto-select + /// based on `base_url`. + pub openai_api: OpenAiApi, } impl Config { @@ -78,6 +94,7 @@ impl Config { env_or("OPENAI_COMPAT_BASE_URL", "https://api.openai.com/v1"), ), }; + let openai_api = parse_openai_api_env(&base_url)?; let system_prompt = match (env("SPROUT_AGENT_SYSTEM_PROMPT"), env("SPROUT_AGENT_SYSTEM_PROMPT_FILE")) { (Some(_), Some(_)) => return Err( "config: SPROUT_AGENT_SYSTEM_PROMPT and SPROUT_AGENT_SYSTEM_PROMPT_FILE are mutually exclusive".into()), @@ -92,6 +109,7 @@ impl Config { model, base_url, anthropic_api_version: env_or("ANTHROPIC_API_VERSION", "2023-06-01"), + openai_api, max_rounds: parse_env("SPROUT_AGENT_MAX_ROUNDS", 0)?, max_output_tokens: parse_env("SPROUT_AGENT_MAX_OUTPUT_TOKENS", 32_768)?, llm_timeout: Duration::from_secs(parse_env("SPROUT_AGENT_LLM_TIMEOUT_SECS", 120)?), @@ -182,6 +200,47 @@ fn req(k: &str) -> Result { env(k).ok_or_else(|| format!("config: {k} required")) } +/// Parse `OPENAI_COMPAT_API` and, on `auto` (or unset), pick a default +/// from `base_url`. Official OpenAI gets Responses; everything else gets +/// Chat Completions. +fn parse_openai_api_env(base_url: &str) -> Result { + let raw = env("OPENAI_COMPAT_API").unwrap_or_else(|| "auto".into()); + match raw.to_ascii_lowercase().as_str() { + "chat" | "chat_completions" | "chat-completions" => Ok(OpenAiApi::ChatCompletions), + "responses" => Ok(OpenAiApi::Responses), + "auto" | "" => Ok(auto_openai_api(base_url)), + other => Err(format!( + "config: OPENAI_COMPAT_API={other} not supported (use auto|chat|responses)" + )), + } +} + +/// Auto-selection rule. Hosts on `*.openai.com` get Responses; anything +/// else (vLLM, Ollama, llama.cpp, OpenRouter, Block Gateway, …) gets +/// Chat Completions, which remains the broadly-supported wire format +/// across the OpenAI-compatible ecosystem. +fn auto_openai_api(base_url: &str) -> OpenAiApi { + if base_url_host(base_url) + .map(|h| h == "api.openai.com" || h.ends_with(".openai.com")) + .unwrap_or(false) + { + OpenAiApi::Responses + } else { + OpenAiApi::ChatCompletions + } +} + +/// Cheap host extractor for `http(s)://host[:port]/...` style URLs. +/// Returns `None` for malformed input — caller treats that as "not +/// official OpenAI" which falls back to Chat Completions. +fn base_url_host(base_url: &str) -> Option<&str> { + let rest = base_url + .strip_prefix("https://") + .or_else(|| base_url.strip_prefix("http://"))?; + let end = rest.find(['/', ':']).unwrap_or(rest.len()); + Some(&rest[..end]) +} + fn parse_env(key: &str, default: T) -> Result where T::Err: std::fmt::Display, @@ -337,4 +396,48 @@ mod tests { // expectation for callers. assert!(hs.allows("*")); } + + #[test] + fn auto_openai_api_picks_responses_for_official_openai() { + assert_eq!( + auto_openai_api("https://api.openai.com/v1"), + OpenAiApi::Responses + ); + assert_eq!( + auto_openai_api("https://api.openai.com"), + OpenAiApi::Responses + ); + } + + #[test] + fn auto_openai_api_picks_chat_for_third_parties() { + // OpenAI-compatible servers: vLLM, Ollama, llama.cpp, OpenRouter, + // Block Gateway, Databricks, anything self-hosted. They mostly do + // not implement /v1/responses, so Chat Completions is the safe + // default. + for url in [ + "http://localhost:11434/v1", // Ollama + "http://127.0.0.1:8000/v1", // vLLM / llama.cpp + "https://openrouter.ai/api/v1", // OpenRouter + "https://gateway.block.example/v1", // Block Gateway + "https://my-vllm.k8s.example.com:443", // self-hosted vLLM + "not a url", // malformed → safe fallback + ] { + assert_eq!( + auto_openai_api(url), + OpenAiApi::ChatCompletions, + "expected Chat Completions for {url}" + ); + } + } + + #[test] + fn auto_openai_api_does_not_match_lookalike_hosts() { + // Defense against host-suffix mistakes: api.openai.com.evil.com + // is NOT openai.com. + assert_eq!( + auto_openai_api("https://api.openai.com.evil.example/v1"), + OpenAiApi::ChatCompletions + ); + } } diff --git a/crates/sprout-agent/src/llm.rs b/crates/sprout-agent/src/llm.rs index ddf85f63..3cb32f27 100644 --- a/crates/sprout-agent/src/llm.rs +++ b/crates/sprout-agent/src/llm.rs @@ -1,7 +1,7 @@ use reqwest::Client; use serde_json::{json, Value}; -use crate::config::{Config, Provider}; +use crate::config::{Config, OpenAiApi, Provider}; use crate::types::{ AgentError, HistoryItem, LlmResponse, ProviderStop, ToolCall, ToolDef, ToolResultContent, }; @@ -40,12 +40,20 @@ impl Llm { .await?; parse_anthropic(v) } - Provider::OpenAi => { - let body = openai_body(cfg, history, tools); - let url = format!("{}/chat/completions", cfg.base_url.trim_end_matches('/')); - let v = post(&self.http, &url, &body, |r| r.bearer_auth(&cfg.api_key)).await?; - parse_openai(v) - } + Provider::OpenAi => match cfg.openai_api { + OpenAiApi::ChatCompletions => { + let body = openai_body(cfg, history, tools); + let url = format!("{}/chat/completions", cfg.base_url.trim_end_matches('/')); + let v = post(&self.http, &url, &body, |r| r.bearer_auth(&cfg.api_key)).await?; + parse_openai(v) + } + OpenAiApi::Responses => { + let body = responses_body(cfg, history, tools); + let url = format!("{}/responses", cfg.base_url.trim_end_matches('/')); + let v = post(&self.http, &url, &body, |r| r.bearer_auth(&cfg.api_key)).await?; + parse_responses(v) + } + }, } } @@ -75,20 +83,33 @@ impl Llm { .await?; Ok(parse_anthropic(v)?.text) } - Provider::OpenAi => { - let body = json!({ - "model": cfg.model, - "stream": false, - "max_completion_tokens": max_output_tokens, - "messages": [ - { "role": "system", "content": system_prompt }, - { "role": "user", "content": user_prompt }, - ], - }); - let url = format!("{}/chat/completions", cfg.base_url.trim_end_matches('/')); - let v = post(&self.http, &url, &body, |r| r.bearer_auth(&cfg.api_key)).await?; - Ok(parse_openai(v)?.text) - } + Provider::OpenAi => match cfg.openai_api { + OpenAiApi::ChatCompletions => { + let body = json!({ + "model": cfg.model, + "stream": false, + "max_completion_tokens": max_output_tokens, + "messages": [ + { "role": "system", "content": system_prompt }, + { "role": "user", "content": user_prompt }, + ], + }); + let url = format!("{}/chat/completions", cfg.base_url.trim_end_matches('/')); + let v = post(&self.http, &url, &body, |r| r.bearer_auth(&cfg.api_key)).await?; + Ok(parse_openai(v)?.text) + } + OpenAiApi::Responses => { + let body = json!({ + "model": cfg.model, + "max_output_tokens": max_output_tokens, + "instructions": system_prompt, + "input": user_prompt, + }); + let url = format!("{}/responses", cfg.base_url.trim_end_matches('/')); + let v = post(&self.http, &url, &body, |r| r.bearer_auth(&cfg.api_key)).await?; + Ok(parse_responses(v)?.text) + } + }, } } } @@ -241,6 +262,198 @@ fn openai_image_user_content(content: &[ToolResultContent]) -> Vec { .collect() } +// ── OpenAI Responses API ─────────────────────────────────────────────────── +// +// Wire shape (model-facing, see https://platform.openai.com/docs/api-reference/responses): +// +// { +// "model": "...", +// "instructions": "", +// "max_output_tokens": N, +// "input": [, ...], +// "tools": [, ...] // flat schema, no nested `function: {…}` +// } +// +// `input_item` is one of: +// { "role": "user"|"assistant", "content": [{"type":"input_text"|"output_text", "text": …}] } +// { "type": "function_call", "call_id": …, "name": …, "arguments": "" } +// { "type": "function_call_output", "call_id": …, "output": "" } +// +// On replay (next turn) the prior assistant `function_call` items **must** +// precede their `function_call_output`s, otherwise the API rejects with +// "No tool call found for call_id ...". The serializer below emits them in +// the order they appear in `history`, which matches the order the agent +// loop produced them. +// +// Image tool results: Responses accepts image parts on user messages +// (`{type: "input_image", image_url: "data:..."}`), so we attach them on a +// trailing user message after the textual `function_call_output`, mirroring +// the Chat Completions branch. + +fn responses_body(cfg: &Config, history: &[HistoryItem], tools: &[ToolDef]) -> Value { + let mut input: Vec = Vec::with_capacity(history.len()); + for item in history { + match item { + HistoryItem::User(text) => input.push(json!({ + "role": "user", + "content": [{ "type": "input_text", "text": text }], + })), + HistoryItem::Assistant { text, tool_calls } => { + if !text.is_empty() { + input.push(json!({ + "role": "assistant", + "content": [{ "type": "output_text", "text": text }], + })); + } + for c in tool_calls { + input.push(json!({ + "type": "function_call", + "call_id": c.provider_id, + "name": c.name, + "arguments": serde_json::to_string(&c.arguments) + .unwrap_or_else(|_| "{}".into()), + })); + } + } + HistoryItem::ToolResult(r) => { + input.push(json!({ + "type": "function_call_output", + "call_id": r.provider_id, + "output": openai_tool_text_content(&r.content), + })); + let image_content = responses_image_user_content(&r.content); + if !image_content.is_empty() { + input.push(json!({ "role": "user", "content": image_content })); + } + } + } + } + + let tools_json: Vec = tools + .iter() + .map(|t| { + json!({ + "type": "function", + "name": t.name, + "description": t.description, + "parameters": t.input_schema, + }) + }) + .collect(); + + let mut body = json!({ + "model": cfg.model, + "instructions": cfg.system_prompt, + "max_output_tokens": cfg.max_output_tokens, + "input": input, + }); + if !tools_json.is_empty() { + body["tools"] = Value::Array(tools_json); + body["tool_choice"] = json!("auto"); + } + body +} + +fn responses_image_user_content(content: &[ToolResultContent]) -> Vec { + content + .iter() + .filter_map(|c| match c { + ToolResultContent::Image { data, mime_type } => Some(json!({ + "type": "input_image", + "image_url": format!("data:{mime_type};base64,{data}"), + })), + ToolResultContent::Text(_) => None, + }) + .collect() +} + +fn parse_responses(v: Value) -> Result { + let mut text = String::new(); + let mut tool_calls = Vec::new(); + let mut saw_function_call = false; + + if let Some(items) = v.get("output").and_then(Value::as_array) { + for item in items { + match item.get("type").and_then(Value::as_str) { + Some("message") => { + if let Some(parts) = item.get("content").and_then(Value::as_array) { + for p in parts { + // Responses uses "output_text" for assistant text. + // Also accept "text" as a forward-compat fallback. + if matches!( + p.get("type").and_then(Value::as_str), + Some("output_text" | "text") + ) { + if let Some(t) = p.get("text").and_then(Value::as_str) { + text.push_str(t); + } + } + } + } + } + Some("function_call") => { + saw_function_call = true; + let raw = item + .get("arguments") + .and_then(Value::as_str) + .unwrap_or("{}"); + let args: Value = serde_json::from_str(raw).map_err(|e| { + AgentError::Llm(format!("function_call.arguments not valid JSON: {e}")) + })?; + tool_calls.push(make_tool_call( + str_field(item, "call_id"), + str_field(item, "name"), + args, + )?); + } + // Reasoning items are model-internal. Sprout's flow is + // stateless across turns and we have no use for the (opaque) + // reasoning summary, so we skip them. + Some("reasoning") => {} + // Unknown item types are ignored for forward compatibility. + _ => {} + } + } + } + + let stop = responses_stop(&v, saw_function_call); + Ok(LlmResponse { + text, + tool_calls, + stop, + }) +} + +/// Map a Responses API result to our `ProviderStop`. +/// +/// Status `incomplete` with `reason == "max_output_tokens"` → `MaxTokens`. +/// Status `completed` with one or more `function_call` items → `ToolUse`. +/// Status `completed` otherwise → `EndTurn`. Anything else (`failed`, +/// `cancelled`, …) → `Other`. +fn responses_stop(v: &Value, saw_function_call: bool) -> ProviderStop { + match v.get("status").and_then(Value::as_str) { + Some("incomplete") => { + let reason = v + .get("incomplete_details") + .and_then(|d| d.get("reason")) + .and_then(Value::as_str); + if matches!(reason, Some("max_output_tokens")) { + ProviderStop::MaxTokens + } else { + ProviderStop::Other + } + } + Some("completed") => { + if saw_function_call { + ProviderStop::ToolUse + } else { + ProviderStop::EndTurn + } + } + _ => ProviderStop::Other, + } +} + fn map_stop(s: Option<&str>) -> ProviderStop { match s { Some("end_turn" | "stop") => ProviderStop::EndTurn, @@ -442,7 +655,7 @@ where #[cfg(test)] mod tests { use super::*; - use crate::config::{Config, HookServers, Provider}; + use crate::config::{Config, HookServers, OpenAiApi, Provider}; use crate::types::{HistoryItem, ToolCall, ToolResult, ToolResultContent}; use std::time::Duration; @@ -470,6 +683,7 @@ mod tests { model: "model".into(), base_url: "http://example.invalid".into(), anthropic_api_version: "2023-06-01".into(), + openai_api: OpenAiApi::ChatCompletions, } } @@ -509,6 +723,203 @@ mod tests { assert_eq!(content[1]["source"]["data"], "aW1n"); } + // ── Responses API unit tests ─────────────────────────────────────── + + fn cfg_responses() -> Config { + let mut c = cfg(Provider::OpenAi); + c.openai_api = OpenAiApi::Responses; + c + } + + fn tool_call_history() -> Vec { + vec![ + HistoryItem::User("call the tool".into()), + HistoryItem::Assistant { + text: "ok, calling".into(), + tool_calls: vec![ToolCall { + provider_id: "call_abc".into(), + name: "dev__shell".into(), + arguments: serde_json::json!({"command": "ls"}), + }], + }, + HistoryItem::ToolResult(ToolResult { + provider_id: "call_abc".into(), + content: vec![ToolResultContent::Text("file.txt".into())], + is_error: false, + }), + ] + } + + #[test] + fn responses_body_top_level_shape() { + let tools = vec![ToolDef { + name: "dev__shell".into(), + description: "run a shell command".into(), + input_schema: serde_json::json!({ + "type": "object", + "properties": {"command": {"type": "string"}}, + }), + }]; + let body = responses_body(&cfg_responses(), &[HistoryItem::User("hi".into())], &tools); + assert_eq!(body["model"], "model"); + assert_eq!(body["instructions"], "system"); + assert_eq!(body["max_output_tokens"], 1024); + assert!( + body.get("messages").is_none(), + "must use `input`, not `messages`" + ); + assert!(body.get("max_tokens").is_none()); + assert!(body.get("max_completion_tokens").is_none()); + + // Tools are flat — top-level type/name/description/parameters. + let tool = &body["tools"][0]; + assert_eq!(tool["type"], "function"); + assert_eq!(tool["name"], "dev__shell"); + assert!( + tool.get("function").is_none(), + "Responses tool schema is flat" + ); + assert_eq!(body["tool_choice"], "auto"); + } + + #[test] + fn responses_body_replay_emits_function_call_before_output() { + // Replay requirement from the live API: the assistant's prior + // function_call item *must* appear in `input[]` before its matching + // function_call_output, otherwise the API rejects with + // "No tool call found for call_id ...". + let body = responses_body(&cfg_responses(), &tool_call_history(), &[]); + let input = body["input"].as_array().unwrap(); + + // [0] user, [1] assistant text, [2] function_call, [3] function_call_output + assert_eq!(input[0]["role"], "user"); + assert_eq!(input[0]["content"][0]["type"], "input_text"); + assert_eq!(input[0]["content"][0]["text"], "call the tool"); + + assert_eq!(input[1]["role"], "assistant"); + assert_eq!(input[1]["content"][0]["type"], "output_text"); + assert_eq!(input[1]["content"][0]["text"], "ok, calling"); + + assert_eq!(input[2]["type"], "function_call"); + assert_eq!(input[2]["call_id"], "call_abc"); + assert_eq!(input[2]["name"], "dev__shell"); + // Arguments are a JSON-encoded string per spec. + assert_eq!(input[2]["arguments"], "{\"command\":\"ls\"}"); + + assert_eq!(input[3]["type"], "function_call_output"); + assert_eq!(input[3]["call_id"], "call_abc"); + assert_eq!(input[3]["output"], "file.txt"); + } + + #[test] + fn responses_body_skips_empty_assistant_text() { + // Mirrors the Chat Completions behavior (#559/#560): empty assistant + // turns are skipped so we don't emit an empty `output_text` block, + // but the tool_call(s) on that assistant turn still go through. + let history = vec![ + HistoryItem::User("u".into()), + HistoryItem::Assistant { + text: String::new(), + tool_calls: vec![ToolCall { + provider_id: "call_x".into(), + name: "t".into(), + arguments: serde_json::json!({}), + }], + }, + ]; + let body = responses_body(&cfg_responses(), &history, &[]); + let input = body["input"].as_array().unwrap(); + assert_eq!(input.len(), 2); + assert_eq!(input[0]["role"], "user"); + assert_eq!(input[1]["type"], "function_call"); + } + + #[test] + fn responses_body_image_tool_result_attaches_input_image() { + let body = responses_body(&cfg_responses(), &image_history(), &[]); + let input = body["input"].as_array().unwrap(); + // function_call_output carries the text part; image rides on a + // trailing user message as `input_image`. + let fco = input + .iter() + .find(|i| i["type"] == "function_call_output") + .unwrap(); + assert_eq!(fco["call_id"], "toolu_1"); + let img_msg = input.iter().rev().find(|i| i["role"] == "user").unwrap(); + assert_eq!(img_msg["content"][0]["type"], "input_image"); + assert_eq!( + img_msg["content"][0]["image_url"], + "data:image/png;base64,aW1n" + ); + } + + #[test] + fn parse_responses_completed_with_text_is_end_turn() { + let v = serde_json::json!({ + "status": "completed", + "output": [{ + "type": "message", + "role": "assistant", + "content": [{"type": "output_text", "text": "hello"}], + }], + }); + let r = parse_responses(v).unwrap(); + assert_eq!(r.text, "hello"); + assert!(r.tool_calls.is_empty()); + assert_eq!(r.stop, ProviderStop::EndTurn); + } + + #[test] + fn parse_responses_completed_with_function_call_is_tool_use() { + let v = serde_json::json!({ + "status": "completed", + "output": [ + {"type": "reasoning", "id": "rs_1", "summary": []}, + { + "type": "function_call", + "call_id": "call_z", + "name": "dev__shell", + "arguments": "{\"command\":\"ls\"}", + }, + ], + }); + let r = parse_responses(v).unwrap(); + assert_eq!(r.text, ""); + assert_eq!(r.tool_calls.len(), 1); + assert_eq!(r.tool_calls[0].provider_id, "call_z"); + assert_eq!(r.tool_calls[0].name, "dev__shell"); + assert_eq!( + r.tool_calls[0].arguments, + serde_json::json!({"command": "ls"}) + ); + assert_eq!(r.stop, ProviderStop::ToolUse); + } + + #[test] + fn parse_responses_incomplete_max_output_tokens() { + let v = serde_json::json!({ + "status": "incomplete", + "incomplete_details": {"reason": "max_output_tokens"}, + "output": [], + }); + let r = parse_responses(v).unwrap(); + assert_eq!(r.stop, ProviderStop::MaxTokens); + } + + #[test] + fn parse_responses_rejects_malformed_function_arguments() { + let v = serde_json::json!({ + "status": "completed", + "output": [{ + "type": "function_call", + "call_id": "call_z", + "name": "t", + "arguments": "not json {", + }], + }); + assert!(matches!(parse_responses(v), Err(AgentError::Llm(_)))); + } + #[test] fn openai_tool_result_adds_followup_image_user_message() { let body = openai_body(&cfg(Provider::OpenAi), &image_history(), &[]); From d17d9bba0fbf176ac5e34195f3a3bdbfc12c12fc Mon Sep 17 00:00:00 2001 From: Tyler Longwell <109685178+tlongwell-block@users.noreply.github.com> Date: Sat, 16 May 2026 20:22:52 -0400 Subject: [PATCH 2/3] =?UTF-8?q?fix(agent):=20scope=20OPENAI=5FCOMPAT=5FAPI?= =?UTF-8?q?=20parsing=20+=20add=20one-shot=20chat=E2=86=92responses=20auto?= =?UTF-8?q?-upgrade?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two follow-ups from review on #604. 1. Anthropic startup hardening (Max #1) `OPENAI_COMPAT_API` was parsed unconditionally, so a stray bad value in an Anthropic-only env broke startup. Parse it only inside the `Provider::OpenAi` arm of `Config::from_env`. Anthropic gets a placeholder `OpenAiApi::ChatCompletions` it never reads. New tests pin the parser behavior without touching process env. 2. One-shot chat→responses auto-upgrade (Max #2, Tyler "automatic detection/fallthrough") When `OPENAI_COMPAT_API=auto` and the provider replies to a Chat Completions request with a body that explicitly names `/v1/responses` (or the prose "use the Responses API"), latch a process-wide sticky-cached upgrade and re-issue the same request on `/v1/responses`. Subsequent calls skip the chat attempt entirely. Pinned values (`OPENAI_COMPAT_API=chat`|`responses`) never auto-upgrade. Signal matcher (`is_responses_required_error`) is intentionally narrow — only matches the literal path `/v1/responses` or specific prose phrases, so we don't get fooled by unrelated 4xx bodies. New `Config.openai_api_auto: bool` records whether the operator resolved-by-auto vs. pinned, so we know when to enable the upgrade. `Llm` gains an `AtomicBool` for the sticky upgrade, plus three small helpers (`effective_openai_api`, `should_try_auto_upgrade`, `latch_responses_upgrade`) so the dispatch reads straight through. Logged at WARN once per process: `"openai chat-completions endpoint reported that this model requires the Responses API; auto-upgrading subsequent OpenAI calls to /v1/responses for the rest of this process"`, with the provider error body attached. Tests: - 4 new unit tests for `is_responses_required_error` covering the Databricks GPT-5.5 signal, OpenAI prose phrasing, and explicit non-matches for `invalid_api_key`, generic `unsupported_parameter`, and empty body. - 3 new unit tests for `parse_openai_api` covering unset-defaults-to-auto, case-insensitive explicit values with whitespace, and rejected garbage. - New integration test `tests/openai_auto_upgrade.rs` spawns a fake provider that 400s on `/chat/completions` with the Databricks signal and 200s on `/responses`. Drives sprout-agent through ACP and asserts `stopReason=end_turn` plus chat-hit-once / responses-hit-once. 65 tests pass, 0 fail. clippy `-D warnings` clean. cargo fmt clean. Live smoke against api.openai.com with gpt-5-mini still 3/3 PASS. Signed-off-by: Tyler Longwell <109685178+tlongwell-block@users.noreply.github.com> --- crates/sprout-agent/src/config.rs | 97 ++++++- crates/sprout-agent/src/llm.rs | 264 +++++++++++++++--- .../sprout-agent/tests/openai_auto_upgrade.rs | 229 +++++++++++++++ 3 files changed, 538 insertions(+), 52 deletions(-) create mode 100644 crates/sprout-agent/tests/openai_auto_upgrade.rs diff --git a/crates/sprout-agent/src/config.rs b/crates/sprout-agent/src/config.rs index 08827694..fca84d6d 100644 --- a/crates/sprout-agent/src/config.rs +++ b/crates/sprout-agent/src/config.rs @@ -73,6 +73,13 @@ pub struct Config { /// Anthropic`. Set via `OPENAI_COMPAT_API`; defaults to auto-select /// based on `base_url`. pub openai_api: OpenAiApi, + /// `true` when `openai_api` was resolved by auto-detection (i.e. + /// `OPENAI_COMPAT_API` was unset or `auto`). When `true` and the + /// resolved value is `ChatCompletions`, `Llm` may upgrade to + /// `Responses` once after observing a specific "use /v1/responses" + /// error from the provider. `false` means the operator pinned the + /// choice and the auto-upgrade path is disabled. + pub openai_api_auto: bool, } impl Config { @@ -82,19 +89,43 @@ impl Config { "openai" | "openai-compat" => Provider::OpenAi, o => return Err(format!("config: SPROUT_AGENT_PROVIDER={o} not supported")), }; - let (api_key, model, base_url) = match provider { + let (api_key, model, base_url, openai_api, openai_api_auto) = match provider { Provider::Anthropic => ( req("ANTHROPIC_API_KEY")?, req("ANTHROPIC_MODEL")?, env_or("ANTHROPIC_BASE_URL", "https://api.anthropic.com"), + // Unused for Anthropic. A placeholder value keeps `Config` + // sized and avoids `Option`/`unwrap` plumbing in the hot + // path. We intentionally do NOT parse `OPENAI_COMPAT_API` + // here — a stray invalid value in an Anthropic-only env + // must not break startup. + OpenAiApi::ChatCompletions, + false, ), - Provider::OpenAi => ( - req("OPENAI_COMPAT_API_KEY")?, - req("OPENAI_COMPAT_MODEL")?, - env_or("OPENAI_COMPAT_BASE_URL", "https://api.openai.com/v1"), - ), + Provider::OpenAi => { + let base_url = env_or("OPENAI_COMPAT_BASE_URL", "https://api.openai.com/v1"); + let raw = env("OPENAI_COMPAT_API"); + // "auto" (or unset/empty) means we may upgrade chat→responses + // on a specific error from the provider. An explicit value + // pins the choice. + let openai_api_auto = matches!( + raw.as_deref() + .unwrap_or("auto") + .trim() + .to_ascii_lowercase() + .as_str(), + "auto" | "" + ); + let openai_api = parse_openai_api(raw.as_deref(), &base_url)?; + ( + req("OPENAI_COMPAT_API_KEY")?, + req("OPENAI_COMPAT_MODEL")?, + base_url, + openai_api, + openai_api_auto, + ) + } }; - let openai_api = parse_openai_api_env(&base_url)?; let system_prompt = match (env("SPROUT_AGENT_SYSTEM_PROMPT"), env("SPROUT_AGENT_SYSTEM_PROMPT_FILE")) { (Some(_), Some(_)) => return Err( "config: SPROUT_AGENT_SYSTEM_PROMPT and SPROUT_AGENT_SYSTEM_PROMPT_FILE are mutually exclusive".into()), @@ -110,6 +141,7 @@ impl Config { base_url, anthropic_api_version: env_or("ANTHROPIC_API_VERSION", "2023-06-01"), openai_api, + openai_api_auto, max_rounds: parse_env("SPROUT_AGENT_MAX_ROUNDS", 0)?, max_output_tokens: parse_env("SPROUT_AGENT_MAX_OUTPUT_TOKENS", 32_768)?, llm_timeout: Duration::from_secs(parse_env("SPROUT_AGENT_LLM_TIMEOUT_SECS", 120)?), @@ -203,9 +235,15 @@ fn req(k: &str) -> Result { /// Parse `OPENAI_COMPAT_API` and, on `auto` (or unset), pick a default /// from `base_url`. Official OpenAI gets Responses; everything else gets /// Chat Completions. -fn parse_openai_api_env(base_url: &str) -> Result { - let raw = env("OPENAI_COMPAT_API").unwrap_or_else(|| "auto".into()); - match raw.to_ascii_lowercase().as_str() { +/// +/// Only called when `provider = OpenAi`. We deliberately don't read this +/// env when `provider = Anthropic` so a stray invalid value cannot break +/// an Anthropic-only deployment. +/// +/// The pure parser takes `raw` as a parameter so tests can drive the +/// full input matrix without touching process env. +fn parse_openai_api(raw: Option<&str>, base_url: &str) -> Result { + match raw.unwrap_or("auto").trim().to_ascii_lowercase().as_str() { "chat" | "chat_completions" | "chat-completions" => Ok(OpenAiApi::ChatCompletions), "responses" => Ok(OpenAiApi::Responses), "auto" | "" => Ok(auto_openai_api(base_url)), @@ -431,6 +469,45 @@ mod tests { } } + #[test] + fn parse_openai_api_unset_defaults_to_auto() { + assert_eq!( + parse_openai_api(None, "https://api.openai.com/v1").unwrap(), + OpenAiApi::Responses, + ); + assert_eq!( + parse_openai_api(None, "http://localhost:11434/v1").unwrap(), + OpenAiApi::ChatCompletions, + ); + } + + #[test] + fn parse_openai_api_accepts_explicit_values() { + let url = "http://example.invalid"; + assert_eq!( + parse_openai_api(Some("chat"), url).unwrap(), + OpenAiApi::ChatCompletions + ); + assert_eq!( + parse_openai_api(Some("chat-completions"), url).unwrap(), + OpenAiApi::ChatCompletions + ); + assert_eq!( + parse_openai_api(Some("RESPONSES"), url).unwrap(), + OpenAiApi::Responses + ); + assert_eq!( + parse_openai_api(Some(" auto "), "https://api.openai.com").unwrap(), + OpenAiApi::Responses + ); + } + + #[test] + fn parse_openai_api_rejects_garbage() { + let err = parse_openai_api(Some("nope"), "http://example.invalid").unwrap_err(); + assert!(err.contains("OPENAI_COMPAT_API=nope")); + } + #[test] fn auto_openai_api_does_not_match_lookalike_hosts() { // Defense against host-suffix mistakes: api.openai.com.evil.com diff --git a/crates/sprout-agent/src/llm.rs b/crates/sprout-agent/src/llm.rs index 3cb32f27..a13beccb 100644 --- a/crates/sprout-agent/src/llm.rs +++ b/crates/sprout-agent/src/llm.rs @@ -11,6 +11,13 @@ const MAX_LLM_ERROR_BODY_BYTES: usize = 4 * 1024; pub struct Llm { http: Client, + /// One-shot sticky flag: once we observe a "use /v1/responses" / + /// "Responses API required" error from a provider while + /// `cfg.openai_api_auto` is set, we flip this to `true` and route + /// every subsequent OpenAI request to the Responses endpoint for + /// the lifetime of the process. The agent loop will then retry the + /// failed turn, which succeeds the second time around. + auto_upgraded_to_responses: std::sync::atomic::AtomicBool, } impl Llm { @@ -20,7 +27,42 @@ impl Llm { .timeout(cfg.llm_timeout) .build() .map_err(|e| AgentError::Llm(format!("http: {e}")))?; - Ok(Self { http }) + Ok(Self { + http, + auto_upgraded_to_responses: std::sync::atomic::AtomicBool::new(false), + }) + } + + /// Effective OpenAI API for this call, accounting for one-shot + /// auto-upgrade. `Responses` if the operator pinned it OR if we've + /// already upgraded; otherwise `ChatCompletions`. + fn effective_openai_api(&self, cfg: &Config) -> OpenAiApi { + if cfg.openai_api == OpenAiApi::Responses + || self + .auto_upgraded_to_responses + .load(std::sync::atomic::Ordering::Relaxed) + { + OpenAiApi::Responses + } else { + OpenAiApi::ChatCompletions + } + } + + /// Should we look for a "use Responses API" hint in this error body? + /// Only when the operator left `OPENAI_COMPAT_API=auto` AND we haven't + /// already upgraded. + fn should_try_auto_upgrade(&self, cfg: &Config) -> bool { + cfg.openai_api_auto + && !self + .auto_upgraded_to_responses + .load(std::sync::atomic::Ordering::Relaxed) + } + + /// Latch the upgrade. Returns the previous value so the caller can log + /// once per process. + fn latch_responses_upgrade(&self) -> bool { + self.auto_upgraded_to_responses + .swap(true, std::sync::atomic::Ordering::Relaxed) } pub async fn complete( @@ -40,21 +82,85 @@ impl Llm { .await?; parse_anthropic(v) } - Provider::OpenAi => match cfg.openai_api { - OpenAiApi::ChatCompletions => { - let body = openai_body(cfg, history, tools); - let url = format!("{}/chat/completions", cfg.base_url.trim_end_matches('/')); - let v = post(&self.http, &url, &body, |r| r.bearer_auth(&cfg.api_key)).await?; - parse_openai(v) - } - OpenAiApi::Responses => { - let body = responses_body(cfg, history, tools); - let url = format!("{}/responses", cfg.base_url.trim_end_matches('/')); - let v = post(&self.http, &url, &body, |r| r.bearer_auth(&cfg.api_key)).await?; - parse_responses(v) - } - }, + Provider::OpenAi => self.openai_complete(cfg, history, tools).await, + } + } + + /// OpenAI dispatch with one-shot auto-upgrade. When `cfg.openai_api_auto` + /// is set and a Chat Completions request comes back with a "use the + /// Responses API" signal from the provider, we latch a process-wide + /// upgrade and re-issue the call against `/v1/responses`. Subsequent + /// calls skip the chat attempt entirely. + async fn openai_complete( + &self, + cfg: &Config, + history: &[HistoryItem], + tools: &[ToolDef], + ) -> Result { + if self.effective_openai_api(cfg) == OpenAiApi::Responses { + return self.openai_responses(cfg, history, tools).await; + } + // Chat Completions, with possible upgrade-then-retry. + match self.openai_chat(cfg, history, tools).await { + Ok(r) => Ok(r), + Err(e) if self.try_upgrade_on_error(cfg, &e) => { + // Latched. Re-issue the same request on Responses. + self.openai_responses(cfg, history, tools).await + } + Err(e) => Err(e), + } + } + + async fn openai_chat( + &self, + cfg: &Config, + history: &[HistoryItem], + tools: &[ToolDef], + ) -> Result { + let body = openai_body(cfg, history, tools); + let url = format!("{}/chat/completions", cfg.base_url.trim_end_matches('/')); + let v = post(&self.http, &url, &body, |r| r.bearer_auth(&cfg.api_key)).await?; + parse_openai(v) + } + + async fn openai_responses( + &self, + cfg: &Config, + history: &[HistoryItem], + tools: &[ToolDef], + ) -> Result { + let body = responses_body(cfg, history, tools); + let url = format!("{}/responses", cfg.base_url.trim_end_matches('/')); + let v = post(&self.http, &url, &body, |r| r.bearer_auth(&cfg.api_key)).await?; + parse_responses(v) + } + + /// Inspect a failed OpenAI Chat Completions error. If we're in `auto` + /// mode and the provider message asks us to use the Responses API, + /// latch the upgrade so subsequent requests go there. Returns `true` + /// iff we just upgraded (caller should re-issue once). + fn try_upgrade_on_error(&self, cfg: &Config, err: &AgentError) -> bool { + if !self.should_try_auto_upgrade(cfg) { + return false; + } + let body = match err { + AgentError::Llm(s) => s.as_str(), + // Auth errors are not "use the other endpoint" errors. + _ => return false, + }; + if !is_responses_required_error(body) { + return false; } + let already = self.latch_responses_upgrade(); + if !already { + tracing::warn!( + provider_message = body, + "openai chat-completions endpoint reported that this model requires \ + the Responses API; auto-upgrading subsequent OpenAI calls to /v1/responses \ + for the rest of this process" + ); + } + true } pub async fn summarize( @@ -83,33 +189,53 @@ impl Llm { .await?; Ok(parse_anthropic(v)?.text) } - Provider::OpenAi => match cfg.openai_api { - OpenAiApi::ChatCompletions => { - let body = json!({ - "model": cfg.model, - "stream": false, - "max_completion_tokens": max_output_tokens, - "messages": [ - { "role": "system", "content": system_prompt }, - { "role": "user", "content": user_prompt }, - ], - }); - let url = format!("{}/chat/completions", cfg.base_url.trim_end_matches('/')); - let v = post(&self.http, &url, &body, |r| r.bearer_auth(&cfg.api_key)).await?; - Ok(parse_openai(v)?.text) - } - OpenAiApi::Responses => { - let body = json!({ - "model": cfg.model, - "max_output_tokens": max_output_tokens, - "instructions": system_prompt, - "input": user_prompt, - }); - let url = format!("{}/responses", cfg.base_url.trim_end_matches('/')); - let v = post(&self.http, &url, &body, |r| r.bearer_auth(&cfg.api_key)).await?; - Ok(parse_responses(v)?.text) - } - }, + Provider::OpenAi => { + self.openai_summarize(cfg, system_prompt, user_prompt, max_output_tokens) + .await + } + } + } + + async fn openai_summarize( + &self, + cfg: &Config, + system_prompt: &str, + user_prompt: &str, + max_output_tokens: u32, + ) -> Result { + let chat = || async { + let body = json!({ + "model": cfg.model, + "stream": false, + "max_completion_tokens": max_output_tokens, + "messages": [ + { "role": "system", "content": system_prompt }, + { "role": "user", "content": user_prompt }, + ], + }); + let url = format!("{}/chat/completions", cfg.base_url.trim_end_matches('/')); + let v = post(&self.http, &url, &body, |r| r.bearer_auth(&cfg.api_key)).await?; + Ok::<_, AgentError>(parse_openai(v)?.text) + }; + let responses = || async { + let body = json!({ + "model": cfg.model, + "max_output_tokens": max_output_tokens, + "instructions": system_prompt, + "input": user_prompt, + }); + let url = format!("{}/responses", cfg.base_url.trim_end_matches('/')); + let v = post(&self.http, &url, &body, |r| r.bearer_auth(&cfg.api_key)).await?; + Ok::<_, AgentError>(parse_responses(v)?.text) + }; + + if self.effective_openai_api(cfg) == OpenAiApi::Responses { + return responses().await; + } + match chat().await { + Ok(t) => Ok(t), + Err(e) if self.try_upgrade_on_error(cfg, &e) => responses().await, + Err(e) => Err(e), } } } @@ -354,6 +480,25 @@ fn responses_body(cfg: &Config, history: &[HistoryItem], tools: &[ToolDef]) -> V body } +/// Detect an "endpoint mismatch" signal in a provider error body. The +/// match is intentionally narrow — we only act on phrases that explicitly +/// name `/v1/responses` or the Responses API, so we don't get fooled by +/// generic 4xx bodies that happen to mention those terms. +/// +/// Known signals: the literal string `/v1/responses` (e.g. the Databricks +/// GPT-5.5 message: "Function tools with reasoning_effort are not supported +/// for gpt-5.5 in /v1/chat/completions. Please use /v1/responses instead."); +/// or the prose phrases "use the Responses API" / "Responses API instead" +/// as a forward-compat slot for OpenAI proper saying the same thing without +/// the URL. +fn is_responses_required_error(body: &str) -> bool { + // Lower-case once; the body is already capped at MAX_LLM_ERROR_BODY_BYTES. + let b = body.to_ascii_lowercase(); + b.contains("/v1/responses") + || b.contains("responses api instead") + || b.contains("use the responses api") +} + fn responses_image_user_content(content: &[ToolResultContent]) -> Vec { content .iter() @@ -684,6 +829,7 @@ mod tests { base_url: "http://example.invalid".into(), anthropic_api_version: "2023-06-01".into(), openai_api: OpenAiApi::ChatCompletions, + openai_api_auto: false, } } @@ -906,6 +1052,40 @@ mod tests { assert_eq!(r.stop, ProviderStop::MaxTokens); } + #[test] + fn is_responses_required_error_matches_databricks_signal() { + // The exact body shape we saw from Databricks (paraphrased; the + // signal is the URL path mention). + let body = "{\"error_code\":\"BAD_REQUEST\",\"message\":\ + \"Function tools with reasoning_effort are not supported \ + for gpt-5.5 in /v1/chat/completions. Please use \ + /v1/responses instead.\"}"; + assert!(is_responses_required_error(body)); + } + + #[test] + fn is_responses_required_error_matches_openai_prose() { + // Forward-compat slot for OpenAI proper phrasing this without + // the URL. + assert!(is_responses_required_error( + "This model requires the Responses API. Please use the Responses API instead." + )); + assert!(is_responses_required_error( + "ERROR: use the Responses API for this model." + )); + } + + #[test] + fn is_responses_required_error_ignores_unrelated_4xx() { + assert!(!is_responses_required_error( + "{\"error\":\"invalid_api_key\",\"message\":\"Incorrect API key provided\"}" + )); + assert!(!is_responses_required_error( + "{\"error\":\"unsupported_parameter\",\"message\":\"max_tokens is not supported with this model\"}" + )); + assert!(!is_responses_required_error("")); + } + #[test] fn parse_responses_rejects_malformed_function_arguments() { let v = serde_json::json!({ diff --git a/crates/sprout-agent/tests/openai_auto_upgrade.rs b/crates/sprout-agent/tests/openai_auto_upgrade.rs new file mode 100644 index 00000000..69f55917 --- /dev/null +++ b/crates/sprout-agent/tests/openai_auto_upgrade.rs @@ -0,0 +1,229 @@ +//! Integration test for OpenAI auto-upgrade chat→responses. +//! +//! Starts a tiny HTTP server that: +//! 1. accepts a POST to /chat/completions, replies 400 with a body that +//! mentions `/v1/responses` (mirrors the Databricks GPT-5.5 signal); +//! 2. accepts a POST to /responses, replies 200 with a Responses-shaped +//! JSON envelope. +//! +//! Spawns `sprout-agent` with `provider=openai` + `OPENAI_COMPAT_API=auto` +//! pointed at the fake server, drives one prompt through the ACP wire +//! protocol, and verifies the prompt completes with `stopReason=end_turn` +//! — which can only happen if the second (Responses) request succeeded. + +use std::io::{Read, Write}; +use std::net::TcpListener; +use std::process::Stdio; +use std::sync::atomic::{AtomicUsize, Ordering}; +use std::sync::Arc; +use std::time::Duration; + +use serde_json::json; +use tokio::io::{AsyncBufReadExt, AsyncWriteExt, BufReader}; +use tokio::process::Command; +use tokio::time::timeout; + +/// Spawns a single-shot fake provider. Returns the base URL (e.g. +/// `http://127.0.0.1:54321`). The server stays up for the lifetime of +/// the process — we don't need to clean it up explicitly. +fn spawn_fake_provider() -> (String, Arc, Arc) { + let listener = TcpListener::bind("127.0.0.1:0").unwrap(); + listener.set_nonblocking(false).unwrap(); + let url = format!("http://{}", listener.local_addr().unwrap()); + let chat_hits = Arc::new(AtomicUsize::new(0)); + let responses_hits = Arc::new(AtomicUsize::new(0)); + let chat = chat_hits.clone(); + let resp = responses_hits.clone(); + + std::thread::spawn(move || { + loop { + let (mut sock, _) = match listener.accept() { + Ok(p) => p, + Err(_) => return, + }; + let chat = chat.clone(); + let resp = resp.clone(); + std::thread::spawn(move || { + sock.set_read_timeout(Some(Duration::from_secs(5))).ok(); + // Read request head + body. Naive: read until we have the + // request line + headers, then read Content-Length bytes. + let mut buf = Vec::with_capacity(4096); + let mut tmp = [0u8; 4096]; + loop { + if buf.windows(4).any(|w| w == b"\r\n\r\n") { + break; + } + match sock.read(&mut tmp) { + Ok(0) | Err(_) => return, + Ok(n) => buf.extend_from_slice(&tmp[..n]), + } + if buf.len() > 256 * 1024 { + return; + } + } + let head_end = buf.windows(4).position(|w| w == b"\r\n\r\n").unwrap() + 4; + let head = String::from_utf8_lossy(&buf[..head_end]).to_string(); + // Drain the body to satisfy keep-alive; we don't actually + // need it. + let cl = head + .lines() + .find_map(|l| { + l.strip_prefix("content-length:") + .or_else(|| l.strip_prefix("Content-Length:")) + }) + .and_then(|s| s.trim().parse::().ok()) + .unwrap_or(0); + while buf.len() < head_end + cl { + match sock.read(&mut tmp) { + Ok(0) | Err(_) => break, + Ok(n) => buf.extend_from_slice(&tmp[..n]), + } + } + + let (status, body) = if head.contains("POST /chat/completions") { + chat.fetch_add(1, Ordering::SeqCst); + let body = json!({ + "error": { + "code": "BAD_REQUEST", + "message": "Function tools with reasoning_effort are not supported for gpt-5.5 in /v1/chat/completions. Please use /v1/responses instead." + } + }) + .to_string(); + (400u16, body) + } else if head.contains("POST /responses") { + resp.fetch_add(1, Ordering::SeqCst); + let body = json!({ + "status": "completed", + "output": [{ + "type": "message", + "role": "assistant", + "content": [{"type": "output_text", "text": "ok from responses"}] + }] + }) + .to_string(); + (200u16, body) + } else { + (404u16, "{}".to_string()) + }; + let reason = match status { + 200 => "OK", + 400 => "Bad Request", + _ => "Not Found", + }; + let resp_text = format!( + "HTTP/1.1 {status} {reason}\r\nContent-Type: application/json\r\nContent-Length: {}\r\nConnection: close\r\n\r\n{}", + body.len(), body + ); + let _ = sock.write_all(resp_text.as_bytes()); + let _ = sock.shutdown(std::net::Shutdown::Write); + }); + } + }); + + (url, chat_hits, responses_hits) +} + +#[tokio::test] +async fn openai_auto_upgrades_chat_to_responses_on_databricks_signal() { + let (base_url, chat_hits, resp_hits) = spawn_fake_provider(); + + let bin = env!("CARGO_BIN_EXE_sprout-agent"); + let mut cmd = Command::new(bin); + cmd.env("SPROUT_AGENT_PROVIDER", "openai") + .env("OPENAI_COMPAT_API_KEY", "test") + .env("OPENAI_COMPAT_MODEL", "gpt-5.5") + .env("OPENAI_COMPAT_BASE_URL", &base_url) + // No OPENAI_COMPAT_API — must default to "auto" so the upgrade + // path is enabled. + .env_remove("OPENAI_COMPAT_API") + .env("SPROUT_AGENT_LLM_TIMEOUT_SECS", "5") + .env("SPROUT_AGENT_MAX_ROUNDS", "4") + .env("SPROUT_AGENT_MCP_INIT_TIMEOUT_SECS", "2") + .stdin(Stdio::piped()) + .stdout(Stdio::piped()) + .stderr(Stdio::inherit()) + .kill_on_drop(true); + + let mut child = cmd.spawn().expect("spawn sprout-agent"); + let mut stdin = child.stdin.take().unwrap(); + let mut stdout = BufReader::new(child.stdout.take().unwrap()); + + async fn send(stdin: &mut tokio::process::ChildStdin, v: serde_json::Value) { + let line = format!("{v}\n"); + stdin.write_all(line.as_bytes()).await.unwrap(); + stdin.flush().await.unwrap(); + } + async fn recv(stdout: &mut BufReader) -> serde_json::Value { + let mut line = String::new(); + timeout(Duration::from_secs(8), stdout.read_line(&mut line)) + .await + .expect("recv timed out") + .expect("recv io"); + serde_json::from_str(&line).expect("recv json") + } + + send( + &mut stdin, + json!({ + "jsonrpc": "2.0", "id": 1, "method": "initialize", + "params": {"protocolVersion": 1, "clientCapabilities": {}, + "clientInfo": {"name": "auto-upgrade-test"}} + }), + ) + .await; + let init = recv(&mut stdout).await; + assert!(init.get("result").is_some(), "initialize: {init}"); + + let cwd = std::env::current_dir().unwrap(); + send( + &mut stdin, + json!({ + "jsonrpc": "2.0", "id": 2, "method": "session/new", + "params": {"cwd": cwd.to_string_lossy(), "mcpServers": []} + }), + ) + .await; + let sess = recv(&mut stdout).await; + let sid = sess["result"]["sessionId"] + .as_str() + .unwrap_or_else(|| panic!("session/new failed: {sess}")) + .to_string(); + + send( + &mut stdin, + json!({ + "jsonrpc": "2.0", "id": 3, "method": "session/prompt", + "params": {"sessionId": sid, + "prompt": [{"type": "text", "text": "hi"}]} + }), + ) + .await; + + // Drain notifications until we see the response for id=3. + let mut stop_reason: Option = None; + for _ in 0..40 { + let msg = recv(&mut stdout).await; + if msg.get("id") == Some(&json!(3)) { + if let Some(r) = msg.get("result") { + stop_reason = r + .get("stopReason") + .and_then(|v| v.as_str()) + .map(String::from); + } + break; + } + } + assert_eq!(stop_reason.as_deref(), Some("end_turn")); + assert_eq!( + chat_hits.load(Ordering::SeqCst), + 1, + "must have tried chat first" + ); + assert!( + resp_hits.load(Ordering::SeqCst) >= 1, + "must have upgraded to responses" + ); + + drop(stdin); + let _ = child.wait().await; +} From d2d06ce3fc24f519028884f0d5e3f9b6d357b352 Mon Sep 17 00:00:00 2001 From: Tyler Longwell <109685178+tlongwell-block@users.noreply.github.com> Date: Sat, 16 May 2026 21:04:36 -0400 Subject: [PATCH 3/3] =?UTF-8?q?refactor(agent):=20minimize=20OpenAI=20Resp?= =?UTF-8?q?onses=20prod=20LOC=20(~455=20=E2=86=92=20~313)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Review pass per Max's PR comments. Same behavior, same core safety tests, fewer production lines. Cuts: - Collapsed `openai_api: OpenAiApi` + `openai_api_auto: bool` into a single tri-state enum `OpenAiApi::{Chat,Responses,Auto}`. The auto- upgrade-on-error path now keys on `cfg.openai_api == Auto` directly instead of a parallel flag. - Replaced the duplicated chat/responses dispatch across `complete` and `summarize` with a single `openai_request` helper. Callers pass a `FnMut(bool) -> (Value, OpenAiParse)` so the body is only built for the endpoint actually selected. - Pre-resolved `OpenAiApi::Auto`'s host check is gone — the endpoint is computed at call time inside `openai_request`. Drops the `auto_openai_api` helper; the test surface is now a single pure `is_openai_host` function in `config.rs`. - Inlined `responses_image_user_content` (single caller). - Inlined `responses_stop` into `parse_responses` (single caller). - Removed wall-of-text protocol comments; kept only the non-obvious replay-ordering invariant and a spec link. - Collapsed test fan-out: `auto_openai_api_*` (3 fns) → `is_openai_host_matrix` (1 table); `parse_openai_api_*` (3 fns) → `parse_openai_api_values` (1 table); `is_responses_required_error_*` (3 fns) → `_matrix` (1 table). What stayed: - Replay-ordering integration test (`responses_body_replay_emits_function_call_before_output`). - Live-fake Databricks integration test (`tests/openai_auto_upgrade.rs`). - Lookalike host safety case (`api.openai.com.evil.example` → Chat). - Narrow `is_responses_required_error` matcher. Net production diff (excluding `#[cfg(test)] mod tests` blocks): config.rs +97 → +48 llm.rs +358 → +265 total +455 → +313 59 tests pass (was 65 — 4 collapsed into 2 tables, 2 helpers removed). cargo fmt + cargo clippy -p sprout-agent --all-targets -D warnings clean. Live smoke against api.openai.com with gpt-5-mini: 3/3 PASS unchanged (plain, tool roundtrip, explicit chat-mode override). Signed-off-by: Tyler Longwell <109685178+tlongwell-block@users.noreply.github.com> --- crates/sprout-agent/src/config.rs | 224 ++++--------- crates/sprout-agent/src/llm.rs | 524 ++++++++++++------------------ 2 files changed, 270 insertions(+), 478 deletions(-) diff --git a/crates/sprout-agent/src/config.rs b/crates/sprout-agent/src/config.rs index fca84d6d..3c1c48f0 100644 --- a/crates/sprout-agent/src/config.rs +++ b/crates/sprout-agent/src/config.rs @@ -28,16 +28,16 @@ pub enum Provider { OpenAi, } -/// Which OpenAI-family HTTP API to call when `provider = OpenAi`. -/// -/// `Auto` (the default) picks per `base_url`: official OpenAI gets the -/// Responses API (`/v1/responses`), everything else (vLLM, Ollama, llama.cpp, -/// OpenRouter, etc.) gets Chat Completions (`/chat/completions`). Operators -/// can pin the choice with `OPENAI_COMPAT_API={chat,responses,auto}`. +/// Which OpenAI-family HTTP API to call. Set via `OPENAI_COMPAT_API` +/// (`auto|chat|responses`); ignored when `provider = Anthropic`. `Auto` +/// picks Responses for `*.openai.com`, Chat Completions otherwise, and +/// permits a one-shot chat→responses upgrade on a "use /v1/responses" +/// provider error. #[derive(Debug, Clone, Copy, PartialEq)] pub enum OpenAiApi { - ChatCompletions, + Chat, Responses, + Auto, } #[derive(Debug, Clone)] @@ -69,17 +69,8 @@ pub struct Config { pub model: String, pub base_url: String, pub anthropic_api_version: String, - /// Which OpenAI-family HTTP API to call. Ignored when `provider = - /// Anthropic`. Set via `OPENAI_COMPAT_API`; defaults to auto-select - /// based on `base_url`. + /// OpenAI endpoint selection. See [`OpenAiApi`]. pub openai_api: OpenAiApi, - /// `true` when `openai_api` was resolved by auto-detection (i.e. - /// `OPENAI_COMPAT_API` was unset or `auto`). When `true` and the - /// resolved value is `ChatCompletions`, `Llm` may upgrade to - /// `Responses` once after observing a specific "use /v1/responses" - /// error from the provider. `false` means the operator pinned the - /// choice and the auto-upgrade path is disabled. - pub openai_api_auto: bool, } impl Config { @@ -89,42 +80,21 @@ impl Config { "openai" | "openai-compat" => Provider::OpenAi, o => return Err(format!("config: SPROUT_AGENT_PROVIDER={o} not supported")), }; - let (api_key, model, base_url, openai_api, openai_api_auto) = match provider { + // OPENAI_COMPAT_API is only read when provider=openai, so a stray + // bad value can't break an Anthropic-only deployment. + let (api_key, model, base_url, openai_api) = match provider { Provider::Anthropic => ( req("ANTHROPIC_API_KEY")?, req("ANTHROPIC_MODEL")?, env_or("ANTHROPIC_BASE_URL", "https://api.anthropic.com"), - // Unused for Anthropic. A placeholder value keeps `Config` - // sized and avoids `Option`/`unwrap` plumbing in the hot - // path. We intentionally do NOT parse `OPENAI_COMPAT_API` - // here — a stray invalid value in an Anthropic-only env - // must not break startup. - OpenAiApi::ChatCompletions, - false, + OpenAiApi::Auto, // unused for Anthropic + ), + Provider::OpenAi => ( + req("OPENAI_COMPAT_API_KEY")?, + req("OPENAI_COMPAT_MODEL")?, + env_or("OPENAI_COMPAT_BASE_URL", "https://api.openai.com/v1"), + parse_openai_api(env("OPENAI_COMPAT_API").as_deref())?, ), - Provider::OpenAi => { - let base_url = env_or("OPENAI_COMPAT_BASE_URL", "https://api.openai.com/v1"); - let raw = env("OPENAI_COMPAT_API"); - // "auto" (or unset/empty) means we may upgrade chat→responses - // on a specific error from the provider. An explicit value - // pins the choice. - let openai_api_auto = matches!( - raw.as_deref() - .unwrap_or("auto") - .trim() - .to_ascii_lowercase() - .as_str(), - "auto" | "" - ); - let openai_api = parse_openai_api(raw.as_deref(), &base_url)?; - ( - req("OPENAI_COMPAT_API_KEY")?, - req("OPENAI_COMPAT_MODEL")?, - base_url, - openai_api, - openai_api_auto, - ) - } }; let system_prompt = match (env("SPROUT_AGENT_SYSTEM_PROMPT"), env("SPROUT_AGENT_SYSTEM_PROMPT_FILE")) { (Some(_), Some(_)) => return Err( @@ -141,7 +111,6 @@ impl Config { base_url, anthropic_api_version: env_or("ANTHROPIC_API_VERSION", "2023-06-01"), openai_api, - openai_api_auto, max_rounds: parse_env("SPROUT_AGENT_MAX_ROUNDS", 0)?, max_output_tokens: parse_env("SPROUT_AGENT_MAX_OUTPUT_TOKENS", 32_768)?, llm_timeout: Duration::from_secs(parse_env("SPROUT_AGENT_LLM_TIMEOUT_SECS", 120)?), @@ -232,51 +201,33 @@ fn req(k: &str) -> Result { env(k).ok_or_else(|| format!("config: {k} required")) } -/// Parse `OPENAI_COMPAT_API` and, on `auto` (or unset), pick a default -/// from `base_url`. Official OpenAI gets Responses; everything else gets -/// Chat Completions. -/// -/// Only called when `provider = OpenAi`. We deliberately don't read this -/// env when `provider = Anthropic` so a stray invalid value cannot break -/// an Anthropic-only deployment. -/// -/// The pure parser takes `raw` as a parameter so tests can drive the -/// full input matrix without touching process env. -fn parse_openai_api(raw: Option<&str>, base_url: &str) -> Result { +/// Parse `OPENAI_COMPAT_API`. Pure (env-free) for testability; the +/// caller hands in the raw value. +fn parse_openai_api(raw: Option<&str>) -> Result { match raw.unwrap_or("auto").trim().to_ascii_lowercase().as_str() { - "chat" | "chat_completions" | "chat-completions" => Ok(OpenAiApi::ChatCompletions), + "chat" | "chat-completions" | "chat_completions" => Ok(OpenAiApi::Chat), "responses" => Ok(OpenAiApi::Responses), - "auto" | "" => Ok(auto_openai_api(base_url)), + "auto" | "" => Ok(OpenAiApi::Auto), other => Err(format!( "config: OPENAI_COMPAT_API={other} not supported (use auto|chat|responses)" )), } } -/// Auto-selection rule. Hosts on `*.openai.com` get Responses; anything -/// else (vLLM, Ollama, llama.cpp, OpenRouter, Block Gateway, …) gets -/// Chat Completions, which remains the broadly-supported wire format -/// across the OpenAI-compatible ecosystem. -fn auto_openai_api(base_url: &str) -> OpenAiApi { - if base_url_host(base_url) - .map(|h| h == "api.openai.com" || h.ends_with(".openai.com")) - .unwrap_or(false) - { - OpenAiApi::Responses - } else { - OpenAiApi::ChatCompletions - } -} - -/// Cheap host extractor for `http(s)://host[:port]/...` style URLs. -/// Returns `None` for malformed input — caller treats that as "not -/// official OpenAI" which falls back to Chat Completions. -fn base_url_host(base_url: &str) -> Option<&str> { - let rest = base_url +/// `true` when `base_url` is an official OpenAI host. Hosts on +/// `*.openai.com` get Responses under `Auto`; everything else (vLLM, +/// Ollama, OpenRouter, Block Gateway, …) gets Chat Completions. +/// Lookalike-safe: `api.openai.com.evil.example` returns `false`. +pub fn is_openai_host(base_url: &str) -> bool { + let rest = match base_url .strip_prefix("https://") - .or_else(|| base_url.strip_prefix("http://"))?; - let end = rest.find(['/', ':']).unwrap_or(rest.len()); - Some(&rest[..end]) + .or_else(|| base_url.strip_prefix("http://")) + { + Some(r) => r, + None => return false, + }; + let host = &rest[..rest.find(['/', ':']).unwrap_or(rest.len())]; + host == "api.openai.com" || host.ends_with(".openai.com") } fn parse_env(key: &str, default: T) -> Result @@ -436,85 +387,38 @@ mod tests { } #[test] - fn auto_openai_api_picks_responses_for_official_openai() { - assert_eq!( - auto_openai_api("https://api.openai.com/v1"), - OpenAiApi::Responses - ); - assert_eq!( - auto_openai_api("https://api.openai.com"), - OpenAiApi::Responses - ); - } - - #[test] - fn auto_openai_api_picks_chat_for_third_parties() { - // OpenAI-compatible servers: vLLM, Ollama, llama.cpp, OpenRouter, - // Block Gateway, Databricks, anything self-hosted. They mostly do - // not implement /v1/responses, so Chat Completions is the safe - // default. - for url in [ - "http://localhost:11434/v1", // Ollama - "http://127.0.0.1:8000/v1", // vLLM / llama.cpp - "https://openrouter.ai/api/v1", // OpenRouter - "https://gateway.block.example/v1", // Block Gateway - "https://my-vllm.k8s.example.com:443", // self-hosted vLLM - "not a url", // malformed → safe fallback + fn parse_openai_api_values() { + use OpenAiApi::*; + for (raw, want) in [ + (None, Ok(Auto)), + (Some("auto"), Ok(Auto)), + (Some(" AUTO "), Ok(Auto)), + (Some(""), Ok(Auto)), + (Some("chat"), Ok(Chat)), + (Some("chat-completions"), Ok(Chat)), + (Some("Responses"), Ok(Responses)), ] { - assert_eq!( - auto_openai_api(url), - OpenAiApi::ChatCompletions, - "expected Chat Completions for {url}" - ); + assert_eq!(parse_openai_api(raw), want, "raw={raw:?}"); } + let err = parse_openai_api(Some("nope")).unwrap_err(); + assert!(err.contains("OPENAI_COMPAT_API=nope"), "{err}"); } #[test] - fn parse_openai_api_unset_defaults_to_auto() { - assert_eq!( - parse_openai_api(None, "https://api.openai.com/v1").unwrap(), - OpenAiApi::Responses, - ); - assert_eq!( - parse_openai_api(None, "http://localhost:11434/v1").unwrap(), - OpenAiApi::ChatCompletions, - ); - } - - #[test] - fn parse_openai_api_accepts_explicit_values() { - let url = "http://example.invalid"; - assert_eq!( - parse_openai_api(Some("chat"), url).unwrap(), - OpenAiApi::ChatCompletions - ); - assert_eq!( - parse_openai_api(Some("chat-completions"), url).unwrap(), - OpenAiApi::ChatCompletions - ); - assert_eq!( - parse_openai_api(Some("RESPONSES"), url).unwrap(), - OpenAiApi::Responses - ); - assert_eq!( - parse_openai_api(Some(" auto "), "https://api.openai.com").unwrap(), - OpenAiApi::Responses - ); - } - - #[test] - fn parse_openai_api_rejects_garbage() { - let err = parse_openai_api(Some("nope"), "http://example.invalid").unwrap_err(); - assert!(err.contains("OPENAI_COMPAT_API=nope")); - } - - #[test] - fn auto_openai_api_does_not_match_lookalike_hosts() { - // Defense against host-suffix mistakes: api.openai.com.evil.com - // is NOT openai.com. - assert_eq!( - auto_openai_api("https://api.openai.com.evil.example/v1"), - OpenAiApi::ChatCompletions - ); + fn is_openai_host_matrix() { + // Lookalike-safe: `api.openai.com.evil.example` and malformed URLs + // are treated as non-OpenAI (which falls back to Chat Completions). + for (url, want) in [ + ("https://api.openai.com/v1", true), + ("https://api.openai.com", true), + ("http://eu.api.openai.com/v1", true), + ("http://localhost:11434/v1", false), + ("https://openrouter.ai/api/v1", false), + ("https://gateway.block.example/v1", false), + ("https://api.openai.com.evil.example/v1", false), + ("not a url", false), + ] { + assert_eq!(is_openai_host(url), want, "url={url}"); + } } } diff --git a/crates/sprout-agent/src/llm.rs b/crates/sprout-agent/src/llm.rs index a13beccb..4cd3442c 100644 --- a/crates/sprout-agent/src/llm.rs +++ b/crates/sprout-agent/src/llm.rs @@ -1,7 +1,9 @@ +use std::sync::atomic::{AtomicBool, Ordering}; + use reqwest::Client; use serde_json::{json, Value}; -use crate::config::{Config, OpenAiApi, Provider}; +use crate::config::{is_openai_host, Config, OpenAiApi, Provider}; use crate::types::{ AgentError, HistoryItem, LlmResponse, ProviderStop, ToolCall, ToolDef, ToolResultContent, }; @@ -9,15 +11,17 @@ use crate::types::{ const MAX_LLM_RESPONSE_BYTES: usize = 16 * 1024 * 1024; const MAX_LLM_ERROR_BODY_BYTES: usize = 4 * 1024; +/// Parser for an OpenAI-family JSON response. Per-endpoint pair lives +/// alongside its `_body` serializer. +type OpenAiParse = fn(Value) -> Result; + pub struct Llm { http: Client, - /// One-shot sticky flag: once we observe a "use /v1/responses" / - /// "Responses API required" error from a provider while - /// `cfg.openai_api_auto` is set, we flip this to `true` and route - /// every subsequent OpenAI request to the Responses endpoint for - /// the lifetime of the process. The agent loop will then retry the - /// failed turn, which succeeds the second time around. - auto_upgraded_to_responses: std::sync::atomic::AtomicBool, + /// One-shot sticky flag: set when a Chat Completions request comes + /// back with a "use /v1/responses" provider error while `cfg.openai_api + /// == Auto`. Subsequent OpenAI calls then go straight to Responses + /// for the lifetime of the process. + auto_upgraded: AtomicBool, } impl Llm { @@ -29,42 +33,10 @@ impl Llm { .map_err(|e| AgentError::Llm(format!("http: {e}")))?; Ok(Self { http, - auto_upgraded_to_responses: std::sync::atomic::AtomicBool::new(false), + auto_upgraded: AtomicBool::new(false), }) } - /// Effective OpenAI API for this call, accounting for one-shot - /// auto-upgrade. `Responses` if the operator pinned it OR if we've - /// already upgraded; otherwise `ChatCompletions`. - fn effective_openai_api(&self, cfg: &Config) -> OpenAiApi { - if cfg.openai_api == OpenAiApi::Responses - || self - .auto_upgraded_to_responses - .load(std::sync::atomic::Ordering::Relaxed) - { - OpenAiApi::Responses - } else { - OpenAiApi::ChatCompletions - } - } - - /// Should we look for a "use Responses API" hint in this error body? - /// Only when the operator left `OPENAI_COMPAT_API=auto` AND we haven't - /// already upgraded. - fn should_try_auto_upgrade(&self, cfg: &Config) -> bool { - cfg.openai_api_auto - && !self - .auto_upgraded_to_responses - .load(std::sync::atomic::Ordering::Relaxed) - } - - /// Latch the upgrade. Returns the previous value so the caller can log - /// once per process. - fn latch_responses_upgrade(&self) -> bool { - self.auto_upgraded_to_responses - .swap(true, std::sync::atomic::Ordering::Relaxed) - } - pub async fn complete( &self, cfg: &Config, @@ -73,94 +45,28 @@ impl Llm { ) -> Result { match cfg.provider { Provider::Anthropic => { - let body = anthropic_body(cfg, history, tools); - let url = format!("{}/v1/messages", cfg.base_url.trim_end_matches('/')); - let v = post(&self.http, &url, &body, |r| { - r.header("x-api-key", &cfg.api_key) - .header("anthropic-version", &cfg.anthropic_api_version) - }) - .await?; + let v = self + .post_anthropic(cfg, &anthropic_body(cfg, history, tools)) + .await?; parse_anthropic(v) } - Provider::OpenAi => self.openai_complete(cfg, history, tools).await, - } - } - - /// OpenAI dispatch with one-shot auto-upgrade. When `cfg.openai_api_auto` - /// is set and a Chat Completions request comes back with a "use the - /// Responses API" signal from the provider, we latch a process-wide - /// upgrade and re-issue the call against `/v1/responses`. Subsequent - /// calls skip the chat attempt entirely. - async fn openai_complete( - &self, - cfg: &Config, - history: &[HistoryItem], - tools: &[ToolDef], - ) -> Result { - if self.effective_openai_api(cfg) == OpenAiApi::Responses { - return self.openai_responses(cfg, history, tools).await; - } - // Chat Completions, with possible upgrade-then-retry. - match self.openai_chat(cfg, history, tools).await { - Ok(r) => Ok(r), - Err(e) if self.try_upgrade_on_error(cfg, &e) => { - // Latched. Re-issue the same request on Responses. - self.openai_responses(cfg, history, tools).await + Provider::OpenAi => { + self.openai_request(cfg, |use_responses| { + if use_responses { + ( + responses_body(cfg, history, tools), + parse_responses as OpenAiParse, + ) + } else { + ( + openai_body(cfg, history, tools), + parse_openai as OpenAiParse, + ) + } + }) + .await } - Err(e) => Err(e), - } - } - - async fn openai_chat( - &self, - cfg: &Config, - history: &[HistoryItem], - tools: &[ToolDef], - ) -> Result { - let body = openai_body(cfg, history, tools); - let url = format!("{}/chat/completions", cfg.base_url.trim_end_matches('/')); - let v = post(&self.http, &url, &body, |r| r.bearer_auth(&cfg.api_key)).await?; - parse_openai(v) - } - - async fn openai_responses( - &self, - cfg: &Config, - history: &[HistoryItem], - tools: &[ToolDef], - ) -> Result { - let body = responses_body(cfg, history, tools); - let url = format!("{}/responses", cfg.base_url.trim_end_matches('/')); - let v = post(&self.http, &url, &body, |r| r.bearer_auth(&cfg.api_key)).await?; - parse_responses(v) - } - - /// Inspect a failed OpenAI Chat Completions error. If we're in `auto` - /// mode and the provider message asks us to use the Responses API, - /// latch the upgrade so subsequent requests go there. Returns `true` - /// iff we just upgraded (caller should re-issue once). - fn try_upgrade_on_error(&self, cfg: &Config, err: &AgentError) -> bool { - if !self.should_try_auto_upgrade(cfg) { - return false; - } - let body = match err { - AgentError::Llm(s) => s.as_str(), - // Auth errors are not "use the other endpoint" errors. - _ => return false, - }; - if !is_responses_required_error(body) { - return false; } - let already = self.latch_responses_upgrade(); - if !already { - tracing::warn!( - provider_message = body, - "openai chat-completions endpoint reported that this model requires \ - the Responses API; auto-upgrading subsequent OpenAI calls to /v1/responses \ - for the rest of this process" - ); - } - true } pub async fn summarize( @@ -181,62 +87,106 @@ impl Llm { "content": [{ "type": "text", "text": user_prompt }], }], }); - let url = format!("{}/v1/messages", cfg.base_url.trim_end_matches('/')); - let v = post(&self.http, &url, &body, |r| { - r.header("x-api-key", &cfg.api_key) - .header("anthropic-version", &cfg.anthropic_api_version) - }) - .await?; - Ok(parse_anthropic(v)?.text) + Ok(parse_anthropic(self.post_anthropic(cfg, &body).await?)?.text) } Provider::OpenAi => { - self.openai_summarize(cfg, system_prompt, user_prompt, max_output_tokens) - .await + let r = self + .openai_request(cfg, |use_responses| { + if use_responses { + ( + json!({ + "model": cfg.model, + "max_output_tokens": max_output_tokens, + "instructions": system_prompt, + "input": user_prompt, + }), + parse_responses as OpenAiParse, + ) + } else { + ( + json!({ + "model": cfg.model, + "stream": false, + "max_completion_tokens": max_output_tokens, + "messages": [ + { "role": "system", "content": system_prompt }, + { "role": "user", "content": user_prompt }, + ], + }), + parse_openai as OpenAiParse, + ) + } + }) + .await?; + Ok(r.text) } } } - async fn openai_summarize( + async fn post_anthropic(&self, cfg: &Config, body: &Value) -> Result { + let url = format!("{}/v1/messages", cfg.base_url.trim_end_matches('/')); + post(&self.http, &url, body, |r| { + r.header("x-api-key", &cfg.api_key) + .header("anthropic-version", &cfg.anthropic_api_version) + }) + .await + } + + /// OpenAI dispatch: resolve endpoint (pinned > sticky-upgraded > auto by + /// host), POST, and on `auto` retry once on Responses if the provider + /// asks for it. `build` is called with `use_responses` so callers + /// only construct the body actually needed. + async fn openai_request(&self, cfg: &Config, mut build: F) -> Result + where + F: FnMut(bool) -> (Value, OpenAiParse) + Send, + { + let use_responses = self.auto_upgraded.load(Ordering::Relaxed) + || matches!(cfg.openai_api, OpenAiApi::Responses) + || matches!(cfg.openai_api, OpenAiApi::Auto) && is_openai_host(&cfg.base_url); + + if use_responses { + let (b, p) = build(true); + return p(self.post_openai(cfg, "/responses", &b).await?); + } + let (b, p) = build(false); + match self.post_openai(cfg, "/chat/completions", &b).await { + Ok(v) => p(v), + Err(e) if cfg.openai_api == OpenAiApi::Auto && self.try_upgrade(&e) => { + let (b, p) = build(true); + p(self.post_openai(cfg, "/responses", &b).await?) + } + Err(e) => Err(e), + } + } + + async fn post_openai( &self, cfg: &Config, - system_prompt: &str, - user_prompt: &str, - max_output_tokens: u32, - ) -> Result { - let chat = || async { - let body = json!({ - "model": cfg.model, - "stream": false, - "max_completion_tokens": max_output_tokens, - "messages": [ - { "role": "system", "content": system_prompt }, - { "role": "user", "content": user_prompt }, - ], - }); - let url = format!("{}/chat/completions", cfg.base_url.trim_end_matches('/')); - let v = post(&self.http, &url, &body, |r| r.bearer_auth(&cfg.api_key)).await?; - Ok::<_, AgentError>(parse_openai(v)?.text) - }; - let responses = || async { - let body = json!({ - "model": cfg.model, - "max_output_tokens": max_output_tokens, - "instructions": system_prompt, - "input": user_prompt, - }); - let url = format!("{}/responses", cfg.base_url.trim_end_matches('/')); - let v = post(&self.http, &url, &body, |r| r.bearer_auth(&cfg.api_key)).await?; - Ok::<_, AgentError>(parse_responses(v)?.text) - }; + path: &str, + body: &Value, + ) -> Result { + let url = format!("{}{}", cfg.base_url.trim_end_matches('/'), path); + post(&self.http, &url, body, |r| r.bearer_auth(&cfg.api_key)).await + } - if self.effective_openai_api(cfg) == OpenAiApi::Responses { - return responses().await; + /// If `err` names `/v1/responses` / "use the Responses API", latch a + /// sticky upgrade so subsequent OpenAI calls hit Responses. Logged once. + fn try_upgrade(&self, err: &AgentError) -> bool { + let body = match err { + AgentError::Llm(s) => s.as_str(), + _ => return false, // auth/transport aren't "use the other endpoint" signals + }; + if !is_responses_required_error(body) { + return false; } - match chat().await { - Ok(t) => Ok(t), - Err(e) if self.try_upgrade_on_error(cfg, &e) => responses().await, - Err(e) => Err(e), + if !self.auto_upgraded.swap(true, Ordering::Relaxed) { + tracing::warn!( + provider_message = body, + "openai: provider asked for the Responses API; \ + routing subsequent OpenAI calls to /v1/responses for this process" + ); } + true } } @@ -389,32 +339,12 @@ fn openai_image_user_content(content: &[ToolResultContent]) -> Vec { } // ── OpenAI Responses API ─────────────────────────────────────────────────── +// Spec: https://platform.openai.com/docs/api-reference/responses // -// Wire shape (model-facing, see https://platform.openai.com/docs/api-reference/responses): -// -// { -// "model": "...", -// "instructions": "", -// "max_output_tokens": N, -// "input": [, ...], -// "tools": [, ...] // flat schema, no nested `function: {…}` -// } -// -// `input_item` is one of: -// { "role": "user"|"assistant", "content": [{"type":"input_text"|"output_text", "text": …}] } -// { "type": "function_call", "call_id": …, "name": …, "arguments": "" } -// { "type": "function_call_output", "call_id": …, "output": "" } -// -// On replay (next turn) the prior assistant `function_call` items **must** -// precede their `function_call_output`s, otherwise the API rejects with -// "No tool call found for call_id ...". The serializer below emits them in -// the order they appear in `history`, which matches the order the agent -// loop produced them. -// -// Image tool results: Responses accepts image parts on user messages -// (`{type: "input_image", image_url: "data:..."}`), so we attach them on a -// trailing user message after the textual `function_call_output`, mirroring -// the Chat Completions branch. +// Replay invariant: each assistant `function_call` input item **must** +// precede its matching `function_call_output`, or the API rejects with +// "No tool call found for call_id ...". `HistoryItem` ordering already +// guarantees this. fn responses_body(cfg: &Config, history: &[HistoryItem], tools: &[ToolDef]) -> Value { let mut input: Vec = Vec::with_capacity(history.len()); @@ -447,9 +377,20 @@ fn responses_body(cfg: &Config, history: &[HistoryItem], tools: &[ToolDef]) -> V "call_id": r.provider_id, "output": openai_tool_text_content(&r.content), })); - let image_content = responses_image_user_content(&r.content); - if !image_content.is_empty() { - input.push(json!({ "role": "user", "content": image_content })); + // Responses takes images as `input_image` parts on a user message. + let images: Vec = r + .content + .iter() + .filter_map(|c| match c { + ToolResultContent::Image { data, mime_type } => Some(json!({ + "type": "input_image", + "image_url": format!("data:{mime_type};base64,{data}"), + })), + ToolResultContent::Text(_) => None, + }) + .collect(); + if !images.is_empty() { + input.push(json!({ "role": "user", "content": images })); } } } @@ -480,123 +421,89 @@ fn responses_body(cfg: &Config, history: &[HistoryItem], tools: &[ToolDef]) -> V body } -/// Detect an "endpoint mismatch" signal in a provider error body. The -/// match is intentionally narrow — we only act on phrases that explicitly -/// name `/v1/responses` or the Responses API, so we don't get fooled by -/// generic 4xx bodies that happen to mention those terms. -/// -/// Known signals: the literal string `/v1/responses` (e.g. the Databricks -/// GPT-5.5 message: "Function tools with reasoning_effort are not supported -/// for gpt-5.5 in /v1/chat/completions. Please use /v1/responses instead."); -/// or the prose phrases "use the Responses API" / "Responses API instead" -/// as a forward-compat slot for OpenAI proper saying the same thing without -/// the URL. +/// Narrow matcher for "you should be on the Responses API" provider errors, +/// the signal we use to auto-upgrade. Triggers on the literal path +/// `/v1/responses` (Databricks GPT-5.5 phrasing) or the prose +/// "use the Responses API" / "Responses API instead". fn is_responses_required_error(body: &str) -> bool { - // Lower-case once; the body is already capped at MAX_LLM_ERROR_BODY_BYTES. let b = body.to_ascii_lowercase(); b.contains("/v1/responses") || b.contains("responses api instead") || b.contains("use the responses api") } -fn responses_image_user_content(content: &[ToolResultContent]) -> Vec { - content - .iter() - .filter_map(|c| match c { - ToolResultContent::Image { data, mime_type } => Some(json!({ - "type": "input_image", - "image_url": format!("data:{mime_type};base64,{data}"), - })), - ToolResultContent::Text(_) => None, - }) - .collect() -} - fn parse_responses(v: Value) -> Result { let mut text = String::new(); let mut tool_calls = Vec::new(); let mut saw_function_call = false; - if let Some(items) = v.get("output").and_then(Value::as_array) { - for item in items { - match item.get("type").and_then(Value::as_str) { - Some("message") => { - if let Some(parts) = item.get("content").and_then(Value::as_array) { - for p in parts { - // Responses uses "output_text" for assistant text. - // Also accept "text" as a forward-compat fallback. - if matches!( - p.get("type").and_then(Value::as_str), - Some("output_text" | "text") - ) { - if let Some(t) = p.get("text").and_then(Value::as_str) { - text.push_str(t); - } - } + for item in v + .get("output") + .and_then(Value::as_array) + .into_iter() + .flatten() + { + match item.get("type").and_then(Value::as_str) { + Some("message") => { + for p in item + .get("content") + .and_then(Value::as_array) + .into_iter() + .flatten() + { + // Responses emits "output_text"; accept "text" forward-compat. + if matches!( + p.get("type").and_then(Value::as_str), + Some("output_text" | "text") + ) { + if let Some(t) = p.get("text").and_then(Value::as_str) { + text.push_str(t); } } } - Some("function_call") => { - saw_function_call = true; - let raw = item - .get("arguments") - .and_then(Value::as_str) - .unwrap_or("{}"); - let args: Value = serde_json::from_str(raw).map_err(|e| { - AgentError::Llm(format!("function_call.arguments not valid JSON: {e}")) - })?; - tool_calls.push(make_tool_call( - str_field(item, "call_id"), - str_field(item, "name"), - args, - )?); - } - // Reasoning items are model-internal. Sprout's flow is - // stateless across turns and we have no use for the (opaque) - // reasoning summary, so we skip them. - Some("reasoning") => {} - // Unknown item types are ignored for forward compatibility. - _ => {} } + Some("function_call") => { + saw_function_call = true; + let raw = item + .get("arguments") + .and_then(Value::as_str) + .unwrap_or("{}"); + let args: Value = serde_json::from_str(raw).map_err(|e| { + AgentError::Llm(format!("function_call.arguments not valid JSON: {e}")) + })?; + tool_calls.push(make_tool_call( + str_field(item, "call_id"), + str_field(item, "name"), + args, + )?); + } + // Reasoning items are opaque/internal; we don't replay them. + // Unknown types ignored for forward-compat. + _ => {} } } - let stop = responses_stop(&v, saw_function_call); - Ok(LlmResponse { - text, - tool_calls, - stop, - }) -} - -/// Map a Responses API result to our `ProviderStop`. -/// -/// Status `incomplete` with `reason == "max_output_tokens"` → `MaxTokens`. -/// Status `completed` with one or more `function_call` items → `ToolUse`. -/// Status `completed` otherwise → `EndTurn`. Anything else (`failed`, -/// `cancelled`, …) → `Other`. -fn responses_stop(v: &Value, saw_function_call: bool) -> ProviderStop { - match v.get("status").and_then(Value::as_str) { + let stop = match v.get("status").and_then(Value::as_str) { Some("incomplete") => { let reason = v .get("incomplete_details") .and_then(|d| d.get("reason")) .and_then(Value::as_str); - if matches!(reason, Some("max_output_tokens")) { + if reason == Some("max_output_tokens") { ProviderStop::MaxTokens } else { ProviderStop::Other } } - Some("completed") => { - if saw_function_call { - ProviderStop::ToolUse - } else { - ProviderStop::EndTurn - } - } + Some("completed") if saw_function_call => ProviderStop::ToolUse, + Some("completed") => ProviderStop::EndTurn, _ => ProviderStop::Other, - } + }; + Ok(LlmResponse { + text, + tool_calls, + stop, + }) } fn map_stop(s: Option<&str>) -> ProviderStop { @@ -828,8 +735,7 @@ mod tests { model: "model".into(), base_url: "http://example.invalid".into(), anthropic_api_version: "2023-06-01".into(), - openai_api: OpenAiApi::ChatCompletions, - openai_api_auto: false, + openai_api: OpenAiApi::Chat, } } @@ -1053,37 +959,19 @@ mod tests { } #[test] - fn is_responses_required_error_matches_databricks_signal() { - // The exact body shape we saw from Databricks (paraphrased; the - // signal is the URL path mention). - let body = "{\"error_code\":\"BAD_REQUEST\",\"message\":\ - \"Function tools with reasoning_effort are not supported \ - for gpt-5.5 in /v1/chat/completions. Please use \ - /v1/responses instead.\"}"; - assert!(is_responses_required_error(body)); - } - - #[test] - fn is_responses_required_error_matches_openai_prose() { - // Forward-compat slot for OpenAI proper phrasing this without - // the URL. - assert!(is_responses_required_error( - "This model requires the Responses API. Please use the Responses API instead." - )); - assert!(is_responses_required_error( - "ERROR: use the Responses API for this model." - )); - } - - #[test] - fn is_responses_required_error_ignores_unrelated_4xx() { - assert!(!is_responses_required_error( - "{\"error\":\"invalid_api_key\",\"message\":\"Incorrect API key provided\"}" - )); - assert!(!is_responses_required_error( - "{\"error\":\"unsupported_parameter\",\"message\":\"max_tokens is not supported with this model\"}" - )); - assert!(!is_responses_required_error("")); + fn is_responses_required_error_matrix() { + for (body, want) in [ + // Databricks GPT-5.5 (the actual case we observed). + ("Function tools with reasoning_effort are not supported for gpt-5.5 in /v1/chat/completions. Please use /v1/responses instead.", true), + // Forward-compat: OpenAI saying the same thing in prose. + ("This model requires the Responses API. Please use the Responses API instead.", true), + // Negatives — must NOT trigger on unrelated 4xx. + ("{\"error\":\"invalid_api_key\"}", false), + ("max_tokens is not supported with this model", false), + ("", false), + ] { + assert_eq!(is_responses_required_error(body), want, "body={body:?}"); + } } #[test]