Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
124 changes: 92 additions & 32 deletions crates/goose/src/providers/formats/databricks.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use crate::conversation::message::{Message, MessageContent};
use crate::model::ModelConfig;
use crate::providers::formats::anthropic::{thinking_effort, thinking_type, ThinkingType};
use crate::providers::utils::{
convert_image, detect_image_path, is_valid_function_name, load_image_file, safely_parse_json,
sanitize_function_name, ImageFormat,
Expand Down Expand Up @@ -231,6 +232,50 @@ fn format_messages(messages: &[Message], image_format: &ImageFormat) -> Vec<Data
result
}

fn apply_claude_thinking_config(payload: &mut Value, model_config: &ModelConfig) {
let obj = payload.as_object_mut().unwrap();

match thinking_type(model_config) {
ThinkingType::Adaptive => {
obj.insert("thinking".to_string(), json!({ "type": "adaptive" }));
obj.insert(
"output_config".to_string(),
json!({ "effort": thinking_effort(model_config).to_string() }),
);
obj.insert(
"max_completion_tokens".to_string(),
json!(model_config.max_output_tokens()),
);
}
ThinkingType::Enabled => {
let budget_tokens = model_config
.get_config_param::<i32>("budget_tokens", "CLAUDE_THINKING_BUDGET")
.unwrap_or(16000)
.max(1024);

let max_tokens = model_config.max_output_tokens() + budget_tokens;
obj.insert("max_tokens".to_string(), json!(max_tokens));
obj.insert(
"thinking".to_string(),
json!({
"type": "enabled",
"budget_tokens": budget_tokens
}),
);
obj.insert("temperature".to_string(), json!(2));
}
ThinkingType::Disabled => {
if let Some(temp) = model_config.temperature {
obj.insert("temperature".to_string(), json!(temp));
}
obj.insert(
"max_completion_tokens".to_string(),
json!(model_config.max_output_tokens()),
);
}
}
}

pub fn format_tools(tools: &[Tool], model_name: &str) -> anyhow::Result<Vec<Value>> {
let mut tool_names = std::collections::HashSet::new();
let mut result = Vec::new();
Expand Down Expand Up @@ -540,11 +585,7 @@ pub fn create_request(
));
}

let model_name = model_config.model_name.to_string();
let is_openai_reasoning_model = model_config.is_openai_reasoning_model();
let is_claude_sonnet =
model_name.contains("claude-3-7-sonnet") || model_name.contains("claude-4-sonnet"); // can be goose- or databricks-

let (model_name, reasoning_effort) = if is_openai_reasoning_model {
let parts: Vec<&str> = model_config.model_name.split('-').collect();
let last_part = parts.last().unwrap();
Expand Down Expand Up @@ -602,34 +643,8 @@ pub fn create_request(
.insert("tools".to_string(), json!(tools_spec));
}

let is_thinking_enabled = std::env::var("CLAUDE_THINKING_ENABLED").is_ok();
if is_claude_sonnet && is_thinking_enabled {
// Anthropic requires budget_tokens >= 1024
const DEFAULT_THINKING_BUDGET: i32 = 16000;
let budget_tokens: i32 = std::env::var("CLAUDE_THINKING_BUDGET")
.ok()
.and_then(|s| s.parse().ok())
.unwrap_or(DEFAULT_THINKING_BUDGET);

// With thinking enabled, max_tokens must include both output and thinking budget
let max_tokens = model_config.max_output_tokens() + budget_tokens;
payload
.as_object_mut()
.unwrap()
.insert("max_tokens".to_string(), json!(max_tokens));

payload.as_object_mut().unwrap().insert(
"thinking".to_string(),
json!({
"type": "enabled",
"budget_tokens": budget_tokens
}),
);

payload
.as_object_mut()
.unwrap()
.insert("temperature".to_string(), json!(2));
if is_claude_model(&model_config.model_name) {
apply_claude_thinking_config(&mut payload, model_config);
} else {
// open ai reasoning models currently don't support temperature
if !is_openai_reasoning_model {
Expand Down Expand Up @@ -1062,6 +1077,51 @@ mod tests {
Ok(())
}

#[test]
fn test_create_request_adaptive_thinking_for_46_models() -> anyhow::Result<()> {
let _guard = env_lock::lock_env([
("CLAUDE_THINKING_TYPE", Some("adaptive")),
("CLAUDE_THINKING_EFFORT", Some("low")),
("CLAUDE_THINKING_ENABLED", None::<&str>),
("CLAUDE_THINKING_BUDGET", None::<&str>),
]);

let mut model_config = ModelConfig::new_or_fail("databricks-claude-opus-4-6");
model_config.max_tokens = Some(4096);

let request = create_request(&model_config, "system", &[], &[], &ImageFormat::OpenAi)?;

assert_eq!(request["thinking"]["type"], "adaptive");
assert_eq!(request["output_config"]["effort"], "low");
assert!(request.get("temperature").is_none());
assert_eq!(request["max_completion_tokens"], 4096);
assert!(request.get("max_tokens").is_none());

Ok(())
}

#[test]
fn test_create_request_enabled_thinking_with_budget() -> anyhow::Result<()> {
let _guard = env_lock::lock_env([
("CLAUDE_THINKING_TYPE", None::<&str>),
("CLAUDE_THINKING_ENABLED", Some("1")),
("CLAUDE_THINKING_BUDGET", Some("10000")),
]);

let mut model_config = ModelConfig::new_or_fail("databricks-claude-3-7-sonnet");
model_config.max_tokens = Some(4096);

let request = create_request(&model_config, "system", &[], &[], &ImageFormat::OpenAi)?;

assert_eq!(request["thinking"]["type"], "enabled");
assert_eq!(request["thinking"]["budget_tokens"], 10000);
assert_eq!(request["max_tokens"], 14096);
assert_eq!(request["temperature"], 2);
assert!(request.get("max_completion_tokens").is_none());

Ok(())
}

#[test]
fn test_response_to_message_claude_thinking() -> anyhow::Result<()> {
let response = json!({
Expand Down
2 changes: 1 addition & 1 deletion documentation/docs/getting-started/providers.md
Original file line number Diff line number Diff line change
Expand Up @@ -1248,7 +1248,7 @@ Some models expose their internal reasoning or "chain of thought" as part of the
| **DeepSeek-R1** (via OpenAI, Ollama, OpenRouter, OVHcloud, etc.) | Reasoning captured from the `reasoning_content` field in the API response |
| **Kimi** (via Groq or other OpenAI-compatible endpoints) | Reasoning captured from the `reasoning_content` field in the API response |
| **Gemini CLI** (Google Gemini models with thinking enabled) | Thinking blocks captured from the streaming response |
| **Claude** (Anthropic, with [extended thinking](/docs/guides/environment-variables#claude-extended-thinking) enabled) | Thinking blocks captured from the API response |
| **Claude** (Anthropic, with [Claude thinking](/docs/guides/environment-variables#claude-thinking-configuration) enabled) | Thinking blocks captured from the API response |

<Tabs groupId="interface">
<TabItem value="ui" label="goose Desktop" default>
Expand Down
27 changes: 15 additions & 12 deletions documentation/docs/guides/environment-variables.md
Original file line number Diff line number Diff line change
Expand Up @@ -164,28 +164,32 @@ export GOOSE_LEAD_FAILURE_THRESHOLD=3
export GOOSE_LEAD_FALLBACK_TURNS=2
```

### Claude Extended Thinking
### Claude Thinking Configuration

These variables control Claude's [extended thinking](https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking) feature, which allows the model to reason through complex problems before generating a response. Supported on Anthropic and Databricks providers.
These variables control Claude's reasoning behavior. Supported on Anthropic and Databricks providers.

| Variable | Purpose | Values | Default |
|----------|---------|---------|---------|
| `CLAUDE_THINKING_ENABLED` | Enables extended thinking for Claude models | Set to any value to enable | Disabled |
| `CLAUDE_THINKING_BUDGET` | Maximum tokens allocated for Claude's internal reasoning process | Positive integer (minimum 1024) | 16000 |
| `CLAUDE_THINKING_TYPE` | Controls Claude reasoning mode | `adaptive`, `enabled`, `disabled` | `adaptive` for Claude 4.6+ models, otherwise `disabled` |
| `CLAUDE_THINKING_BUDGET` | Maximum tokens allocated for Claude's internal reasoning process when `CLAUDE_THINKING_TYPE=enabled` | Positive integer (minimum 1024) | 16000 |

**Examples**

```bash
# Enable extended thinking with default budget (16000 tokens)
export CLAUDE_THINKING_ENABLED=1
# Claude 4.6 adaptive thinking
export GOOSE_PROVIDER=anthropic
export GOOSE_MODEL=claude-sonnet-4-6
export CLAUDE_THINKING_TYPE=adaptive

# Enable with a larger budget for complex tasks
export CLAUDE_THINKING_ENABLED=1
# Explicit extended thinking with the default budget
export CLAUDE_THINKING_TYPE=enabled

# Explicit extended thinking with a larger budget for complex tasks
export CLAUDE_THINKING_TYPE=enabled
export CLAUDE_THINKING_BUDGET=32000

# Enable with a smaller budget for faster responses
export CLAUDE_THINKING_ENABLED=1
export CLAUDE_THINKING_BUDGET=8000
# Disable Claude thinking entirely
export CLAUDE_THINKING_TYPE=disabled
```

:::tip Viewing Thinking Output
Expand Down Expand Up @@ -734,4 +738,3 @@ When deploying goose in enterprise environments, administrators might need to co
- For security-sensitive variables (like API keys), consider using the system keyring instead of environment variables.
- Some variables may require restarting goose to take effect.
- When using the planning mode, if planner-specific variables are not set, goose will fall back to the main model configuration.

Loading