From c297b3fe42920ed70842d63af6d35c8e613909b6 Mon Sep 17 00:00:00 2001 From: tlongwell-block <109685178+tlongwell-block@users.noreply.github.com> Date: Thu, 21 May 2026 12:42:22 -0400 Subject: [PATCH] feat(sprout-agent): fall back to Databricks OAuth when API keys are missing Signed-off-by: tlongwell-block <109685178+tlongwell-block@users.noreply.github.com> --- Cargo.lock | 1 + crates/sprout-agent/Cargo.toml | 1 + crates/sprout-agent/README.md | 18 +- crates/sprout-agent/src/config.rs | 296 +++++++++++++++++++++++++++++- 4 files changed, 304 insertions(+), 12 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 867aadd62..ab8480cbe 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3914,6 +3914,7 @@ dependencies = [ "rmcp", "serde", "serde_json", + "serde_yaml", "sha2 0.11.0", "tempfile", "tokio", diff --git a/crates/sprout-agent/Cargo.toml b/crates/sprout-agent/Cargo.toml index 1c2b66cf5..9951556a5 100644 --- a/crates/sprout-agent/Cargo.toml +++ b/crates/sprout-agent/Cargo.toml @@ -28,6 +28,7 @@ path = "tests/bin/fake_mcp.rs" tokio = { workspace = true, features = ["rt-multi-thread", "macros", "io-std", "io-util", "sync", "process", "time", "net"] } serde = { workspace = true } serde_json = { workspace = true } +serde_yaml = { workspace = true } reqwest = { workspace = true, features = ["json", "rustls", "form"] } rmcp = { version = "1", default-features = false, features = ["client", "transport-child-process"] } arc-swap = "1" diff --git a/crates/sprout-agent/README.md b/crates/sprout-agent/README.md index 5a6c2cf7c..7e23ee0ef 100644 --- a/crates/sprout-agent/README.md +++ b/crates/sprout-agent/README.md @@ -49,6 +49,12 @@ OPENAI_COMPAT_API_KEY=sk-... \ OPENAI_COMPAT_MODEL=gpt-5 \ OPENAI_COMPAT_BASE_URL=https://api.openai.com/v1 \ ./target/release/sprout-agent + +# Or Databricks model serving via OAuth 2.0 PKCE +SPROUT_AGENT_PROVIDER=databricks \ +DATABRICKS_HOST=https://dbc-...cloud.databricks.com \ +DATABRICKS_MODEL=goose-claude-4-6-sonnet \ + ./target/release/sprout-agent ``` That's the whole setup. The agent reads JSON-RPC frames from stdin, writes them to stdout, and logs to stderr. @@ -123,15 +129,18 @@ Everything is environment variables. No flags, no config files. (We are a subpro | Variable | Default | Notes | |---|---|---| -| `SPROUT_AGENT_PROVIDER` | — | Required. `anthropic` or `openai`. | -| `ANTHROPIC_API_KEY` | — | Required when provider=anthropic. | +| `SPROUT_AGENT_PROVIDER` | — | `anthropic`, `openai`, or `databricks`. If unset, or if `anthropic`/`openai` is selected but its API key is missing, Databricks is auto-selected when `DATABRICKS_HOST` + `DATABRICKS_MODEL` are set. | +| `ANTHROPIC_API_KEY` | — | Required when provider=anthropic unless Databricks fallback is configured. | | `ANTHROPIC_MODEL` | — | Required when provider=anthropic. | | `ANTHROPIC_BASE_URL` | `https://api.anthropic.com` | | | `ANTHROPIC_API_VERSION` | `2023-06-01` | | -| `OPENAI_COMPAT_API_KEY` | — | Required when provider=openai. | +| `OPENAI_COMPAT_API_KEY` | — | Required when provider=openai unless Databricks fallback is configured. | | `OPENAI_COMPAT_MODEL` | — | Required when provider=openai. | | `OPENAI_COMPAT_BASE_URL` | `https://api.openai.com/v1` | Point at vLLM, llama.cpp, OpenRouter, Ollama, etc. | | `OPENAI_COMPAT_API` | `auto` | `auto` \| `chat` \| `responses`. `auto` picks Responses for `*.openai.com`, Chat Completions everywhere else. | +| `DATABRICKS_HOST` | goose config | Required when provider=databricks or when using Databricks fallback. If unset, read from goose's `~/.config/goose/config.yaml`. | +| `DATABRICKS_MODEL` | goose config | Required when provider=databricks or when using Databricks fallback. If unset, uses `DATABRICKS_MODEL` from goose config, or `GOOSE_MODEL`/`GOOSE_MODE` when `GOOSE_PROVIDER=databricks`. | +| `DATABRICKS_TOKEN` | — | Optional static bearer escape hatch. If unset, Databricks uses browser OAuth + refresh cache. | | `SPROUT_AGENT_SYSTEM_PROMPT` | built-in | Inline system prompt. | | `SPROUT_AGENT_SYSTEM_PROMPT_FILE` | — | File path. Mutually exclusive with the above. | | `SPROUT_AGENT_MAX_ROUNDS` | `0` | Tool-loop iteration cap. 0 = unlimited. | @@ -157,6 +166,9 @@ Everything is environment variables. No flags, no config files. (We are a subpro | Ollama | `openai` | `POST {base}/chat/completions` | llama3.1, qwen2.5-coder | | OpenRouter | `openai` | `POST {base}/chat/completions` | anything they route | | Block Gateway | `openai` | `POST {base}/chat/completions` | gpt-5, claude | +| Databricks | `databricks` | `POST {host}/serving-endpoints/{model}/invocations` | goose-claude-4-6-sonnet | + +If `SPROUT_AGENT_PROVIDER=anthropic` is selected without `ANTHROPIC_API_KEY`, or `SPROUT_AGENT_PROVIDER=openai` is selected without `OPENAI_COMPAT_API_KEY`, the agent automatically falls back to Databricks OAuth when Databricks host/model config is available. The same Databricks fallback applies when `SPROUT_AGENT_PROVIDER` is unset. Host/model can come from env or from goose's config file; explicit Anthropic/OpenAI API keys always win. `provider=openai` speaks two HTTP dialects: the [Responses API](https://platform.openai.com/docs/api-reference/responses) (`/v1/responses`, required for GPT-5 / o-series tool-calling on OpenAI's own service) and the [Chat Completions API](https://platform.openai.com/docs/api-reference/chat) (`/chat/completions`, the broadly-supported OpenAI-compatible wire format). diff --git a/crates/sprout-agent/src/config.rs b/crates/sprout-agent/src/config.rs index 97ec5ecb1..af7d30459 100644 --- a/crates/sprout-agent/src/config.rs +++ b/crates/sprout-agent/src/config.rs @@ -1,4 +1,4 @@ -use std::time::Duration; +use std::{collections::HashMap, path::PathBuf, time::Duration}; pub const PROTOCOL_VERSION: u32 = 1; @@ -79,12 +79,16 @@ pub struct Config { impl Config { pub fn from_env() -> Result { - let provider = match req("SPROUT_AGENT_PROVIDER")?.to_ascii_lowercase().as_str() { - "anthropic" => Provider::Anthropic, - "openai" | "openai-compat" => Provider::OpenAi, - "databricks" => Provider::Databricks, - o => return Err(format!("config: SPROUT_AGENT_PROVIDER={o} not supported")), - }; + let goose_databricks = GooseDatabricksConfig::load_default(); + let databricks_host = env("DATABRICKS_HOST").or_else(|| goose_databricks.host.clone()); + let databricks_model = env("DATABRICKS_MODEL").or_else(|| goose_databricks.model.clone()); + let provider = resolve_provider( + env("SPROUT_AGENT_PROVIDER").as_deref(), + env("ANTHROPIC_API_KEY").as_deref(), + env("OPENAI_COMPAT_API_KEY").as_deref(), + databricks_host.as_deref(), + databricks_model.as_deref(), + )?; // OPENAI_COMPAT_API is only read when provider=openai, so a stray // bad value can't break an Anthropic-only deployment. // @@ -106,8 +110,12 @@ impl Config { ), Provider::Databricks => ( env("DATABRICKS_TOKEN").unwrap_or_default(), - req("DATABRICKS_MODEL")?, - req("DATABRICKS_HOST")?, + databricks_model.ok_or_else(|| { + "config: DATABRICKS_MODEL required (or set GOOSE_MODEL in goose config with GOOSE_PROVIDER=databricks)".to_string() + })?, + databricks_host.ok_or_else(|| { + "config: DATABRICKS_HOST required (or set DATABRICKS_HOST in goose config)".to_string() + })?, OpenAiApi::Chat, // Databricks invocations is chat-shaped ), }; @@ -216,6 +224,121 @@ fn req(k: &str) -> Result { env(k).ok_or_else(|| format!("config: {k} required")) } +#[derive(Default)] +struct GooseDatabricksConfig { + host: Option, + model: Option, +} + +impl GooseDatabricksConfig { + fn load_default() -> Self { + goose_config_path() + .and_then(|p| Self::load_from_path(&p)) + .unwrap_or_default() + } + + fn load_from_path(path: &std::path::Path) -> Option { + let raw = std::fs::read_to_string(path).ok()?; + let map: HashMap = serde_yaml::from_str(&raw).ok()?; + Some(Self::from_map(&map)) + } + + fn from_map(map: &HashMap) -> Self { + let host = yaml_string(map, "DATABRICKS_HOST"); + let explicit_model = yaml_string(map, "DATABRICKS_MODEL"); + let goose_provider = yaml_string(map, "GOOSE_PROVIDER"); + let goose_model = yaml_string(map, "GOOSE_MODEL"); + let goose_mode = yaml_string(map, "GOOSE_MODE"); + let model = explicit_model.or_else(|| { + if goose_provider + .as_deref() + .is_some_and(|p| p.eq_ignore_ascii_case("databricks")) + { + goose_model.or(goose_mode) + } else { + None + } + }); + Self { host, model } + } +} + +fn yaml_string(map: &HashMap, key: &str) -> Option { + map.get(key)? + .as_str() + .map(str::trim) + .filter(|s| !s.is_empty()) + .map(str::to_string) +} + +fn goose_config_path() -> Option { + if let Ok(root) = std::env::var("GOOSE_PATH_ROOT") { + return Some(PathBuf::from(root).join("config").join("config.yaml")); + } + let home = std::env::var("HOME").ok()?; + Some( + PathBuf::from(home) + .join(".config") + .join("goose") + .join("config.yaml"), + ) +} + +fn present_nonempty(v: Option<&str>) -> bool { + v.map(str::trim).is_some_and(|s| !s.is_empty()) +} + +fn databricks_available(host: Option<&str>, model: Option<&str>) -> bool { + present_nonempty(host) && present_nonempty(model) +} + +fn resolve_provider( + requested: Option<&str>, + anthropic_key: Option<&str>, + openai_key: Option<&str>, + databricks_host: Option<&str>, + databricks_model: Option<&str>, +) -> Result { + let databricks_ready = databricks_available(databricks_host, databricks_model); + match requested.map(str::trim).filter(|s| !s.is_empty()) { + Some(raw) => { + let normalized = raw.to_ascii_lowercase(); + match normalized.as_str() { + "anthropic" if present_nonempty(anthropic_key) => Ok(Provider::Anthropic), + "anthropic" if databricks_ready => { + tracing::warn!( + requested = raw, + "API key missing for requested provider; falling back to Databricks OAuth" + ); + Ok(Provider::Databricks) + } + "anthropic" => Err( + "config: ANTHROPIC_API_KEY required (or set DATABRICKS_HOST and DATABRICKS_MODEL for Databricks OAuth fallback)".into(), + ), + "openai" | "openai-compat" if present_nonempty(openai_key) => Ok(Provider::OpenAi), + "openai" | "openai-compat" if databricks_ready => { + tracing::warn!( + requested = raw, + "API key missing for requested provider; falling back to Databricks OAuth" + ); + Ok(Provider::Databricks) + } + "openai" | "openai-compat" => Err( + "config: OPENAI_COMPAT_API_KEY required (or set DATABRICKS_HOST and DATABRICKS_MODEL for Databricks OAuth fallback)".into(), + ), + "databricks" => Ok(Provider::Databricks), + _ => Err(format!( + "config: SPROUT_AGENT_PROVIDER={raw} not supported" + )), + } + } + None if databricks_ready => Ok(Provider::Databricks), + None => Err( + "config: SPROUT_AGENT_PROVIDER required (or set DATABRICKS_HOST and DATABRICKS_MODEL for Databricks OAuth fallback)".into(), + ), + } +} + /// Parse `OPENAI_COMPAT_API`. Pure (env-free) for testability; the /// caller hands in the raw value. fn parse_openai_api(raw: Option<&str>) -> Result { @@ -419,6 +542,161 @@ mod tests { assert!(err.contains("OPENAI_COMPAT_API=nope"), "{err}"); } + #[test] + fn goose_databricks_config_reads_host_and_model() { + let map = HashMap::from([ + ( + "DATABRICKS_HOST".to_string(), + serde_yaml::Value::String("https://dbc.example".into()), + ), + ( + "GOOSE_PROVIDER".to_string(), + serde_yaml::Value::String("databricks".into()), + ), + ( + "GOOSE_MODEL".to_string(), + serde_yaml::Value::String("goose-claude-4-6-sonnet".into()), + ), + ]); + let cfg = GooseDatabricksConfig::from_map(&map); + assert_eq!(cfg.host.as_deref(), Some("https://dbc.example")); + assert_eq!(cfg.model.as_deref(), Some("goose-claude-4-6-sonnet")); + } + + #[test] + fn goose_databricks_config_prefers_explicit_databricks_model() { + let map = HashMap::from([ + ( + "DATABRICKS_HOST".to_string(), + serde_yaml::Value::String("https://dbc.example".into()), + ), + ( + "DATABRICKS_MODEL".to_string(), + serde_yaml::Value::String("explicit-db-model".into()), + ), + ( + "GOOSE_PROVIDER".to_string(), + serde_yaml::Value::String("databricks".into()), + ), + ( + "GOOSE_MODEL".to_string(), + serde_yaml::Value::String("goose-model".into()), + ), + ]); + let cfg = GooseDatabricksConfig::from_map(&map); + assert_eq!(cfg.model.as_deref(), Some("explicit-db-model")); + } + + #[test] + fn goose_databricks_config_ignores_goose_model_for_other_provider() { + let map = HashMap::from([ + ( + "DATABRICKS_HOST".to_string(), + serde_yaml::Value::String("https://dbc.example".into()), + ), + ( + "GOOSE_PROVIDER".to_string(), + serde_yaml::Value::String("anthropic".into()), + ), + ( + "GOOSE_MODEL".to_string(), + serde_yaml::Value::String("claude".into()), + ), + ]); + let cfg = GooseDatabricksConfig::from_map(&map); + assert_eq!(cfg.host.as_deref(), Some("https://dbc.example")); + assert!(cfg.model.is_none()); + } + + #[test] + fn resolve_provider_keeps_requested_provider_when_token_present() { + assert_eq!( + resolve_provider( + Some("anthropic"), + Some("sk-ant"), + None, + Some("https://dbc.example"), + Some("db-model") + ) + .unwrap(), + Provider::Anthropic + ); + assert_eq!( + resolve_provider( + Some("openai"), + None, + Some("sk-openai"), + Some("https://dbc.example"), + Some("db-model") + ) + .unwrap(), + Provider::OpenAi + ); + } + + #[test] + fn resolve_provider_falls_back_to_databricks_when_requested_token_missing() { + assert_eq!( + resolve_provider( + Some("anthropic"), + None, + None, + Some("https://dbc.example"), + Some("goose-claude-4-6-sonnet") + ) + .unwrap(), + Provider::Databricks + ); + assert_eq!( + resolve_provider( + Some("openai-compat"), + None, + Some(" "), + Some("https://dbc.example"), + Some("goose-claude-4-6-sonnet") + ) + .unwrap(), + Provider::Databricks + ); + } + + #[test] + fn resolve_provider_can_auto_select_databricks_without_explicit_provider() { + assert_eq!( + resolve_provider( + None, + None, + None, + Some("https://dbc.example"), + Some("goose-claude-4-6-sonnet") + ) + .unwrap(), + Provider::Databricks + ); + } + + #[test] + fn resolve_provider_requires_databricks_host_and_model_for_fallback() { + let err = resolve_provider( + Some("openai"), + None, + None, + Some("https://dbc.example"), + None, + ) + .unwrap_err(); + assert!(err.contains("OPENAI_COMPAT_API_KEY required")); + let err = + resolve_provider(None, None, None, Some("https://dbc.example"), None).unwrap_err(); + assert!(err.contains("SPROUT_AGENT_PROVIDER required")); + } + + #[test] + fn resolve_provider_unsupported_error_preserves_user_casing() { + let err = resolve_provider(Some("OpenAIish"), None, None, None, None).unwrap_err(); + assert!(err.contains("SPROUT_AGENT_PROVIDER=OpenAIish")); + } + #[test] fn is_openai_host_matrix() { // Lookalike-safe: `api.openai.com.evil.example` and malformed URLs