Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion codex-rs/core/src/chat_completions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ use crate::util::backoff;
pub(crate) async fn stream_chat_completions(
prompt: &Prompt,
model: &str,
include_plan_tool: bool,
client: &reqwest::Client,
provider: &ModelProviderInfo,
) -> Result<ResponseStream> {
Expand Down Expand Up @@ -105,7 +106,7 @@ pub(crate) async fn stream_chat_completions(
}
}

let tools_json = create_tools_json_for_chat_completions_api(prompt, model)?;
let tools_json = create_tools_json_for_chat_completions_api(prompt, model, include_plan_tool)?;
let payload = json!({
"model": model,
"messages": messages,
Expand Down
7 changes: 6 additions & 1 deletion codex-rs/core/src/client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ impl ModelClient {
let response_stream = stream_chat_completions(
prompt,
&self.config.model,
self.config.include_plan_tool,
&self.client,
&self.provider,
)
Expand Down Expand Up @@ -115,7 +116,11 @@ impl ModelClient {
}

let full_instructions = prompt.get_full_instructions(&self.config.model);
let tools_json = create_tools_json_for_responses_api(prompt, &self.config.model)?;
let tools_json = create_tools_json_for_responses_api(
prompt,
&self.config.model,
self.config.include_plan_tool,
)?;
let reasoning = create_reasoning_param_for_request(&self.config, self.effort, self.summary);

// Request encrypted COT if we are not storing responses,
Expand Down
2 changes: 2 additions & 0 deletions codex-rs/core/src/codex.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ use crate::models::ReasoningItemReasoningSummary;
use crate::models::ResponseInputItem;
use crate::models::ResponseItem;
use crate::models::ShellToolCallParams;
use crate::plan_tool::handle_update_plan;
use crate::project_doc::get_user_instructions;
use crate::protocol::AgentMessageDeltaEvent;
use crate::protocol::AgentMessageEvent;
Expand Down Expand Up @@ -1336,6 +1337,7 @@ async fn handle_function_call(
};
handle_container_exec_with_params(params, sess, sub_id, call_id).await
}
"update_plan" => handle_update_plan(sess, arguments, sub_id, call_id).await,
_ => {
match sess.mcp_connection_manager.parse_tool_name(&name) {
Some((server, tool_name)) => {
Expand Down
66 changes: 56 additions & 10 deletions codex-rs/core/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,9 @@ pub struct Config {

/// Experimental rollout resume path (absolute path to .jsonl; undocumented).
pub experimental_resume: Option<PathBuf>,

/// Include an experimental plan tool that the model can use to update its current plan and status of each step.
pub include_plan_tool: bool,
}

impl Config {
Expand Down Expand Up @@ -366,6 +369,7 @@ pub struct ConfigOverrides {
pub config_profile: Option<String>,
pub codex_linux_sandbox_exe: Option<PathBuf>,
pub base_instructions: Option<String>,
pub include_plan_tool: Option<bool>,
}

impl Config {
Expand All @@ -388,6 +392,7 @@ impl Config {
config_profile: config_profile_key,
codex_linux_sandbox_exe,
base_instructions,
include_plan_tool,
} = overrides;

let config_profile = match config_profile_key.as_ref().or(cfg.profile.as_ref()) {
Expand Down Expand Up @@ -465,9 +470,14 @@ impl Config {

let experimental_resume = cfg.experimental_resume;

let base_instructions = base_instructions.or(Self::get_base_instructions(
// Load base instructions override from a file if specified. If the
// path is relative, resolve it against the effective cwd so the
// behaviour matches other path-like config values.
let file_base_instructions = Self::get_base_instructions(
cfg.experimental_instructions_file.as_ref(),
));
&resolved_cwd,
)?;
let base_instructions = base_instructions.or(file_base_instructions);

let config = Self {
model,
Expand Down Expand Up @@ -516,8 +526,8 @@ impl Config {
.chatgpt_base_url
.or(cfg.chatgpt_base_url)
.unwrap_or("https://chatgpt.com/backend-api/".to_string()),

experimental_resume,
include_plan_tool: include_plan_tool.unwrap_or(false),
};
Ok(config)
}
Expand All @@ -539,13 +549,46 @@ impl Config {
})
}

fn get_base_instructions(path: Option<&PathBuf>) -> Option<String> {
let path = path.as_ref()?;
fn get_base_instructions(
path: Option<&PathBuf>,
cwd: &Path,
) -> std::io::Result<Option<String>> {
let p = match path.as_ref() {
None => return Ok(None),
Some(p) => p,
};

std::fs::read_to_string(path)
.ok()
.map(|s| s.trim().to_string())
.filter(|s| !s.is_empty())
// Resolve relative paths against the provided cwd to make CLI
// overrides consistent regardless of where the process was launched
// from.
let full_path = if p.is_relative() {
cwd.join(p)
} else {
p.to_path_buf()
};

let contents = std::fs::read_to_string(&full_path).map_err(|e| {
std::io::Error::new(
e.kind(),
format!(
"failed to read experimental instructions file {}: {e}",
full_path.display()
),
)
})?;

let s = contents.trim().to_string();
if s.is_empty() {
Err(std::io::Error::new(
std::io::ErrorKind::InvalidData,
format!(
"experimental instructions file is empty: {}",
full_path.display()
),
))
} else {
Ok(Some(s))
}
}
}

Expand Down Expand Up @@ -791,7 +834,7 @@ disable_response_storage = true
///
/// 1. custom command-line argument, e.g. `--model o3`
/// 2. as part of a profile, where the `--profile` is specified via a CLI
/// (or in the config file itelf)
/// (or in the config file itself)
/// 3. as an entry in `config.toml`, e.g. `model = "o3"`
/// 4. the default value for a required field defined in code, e.g.,
/// `crate::flags::OPENAI_DEFAULT_MODEL`
Expand Down Expand Up @@ -841,6 +884,7 @@ disable_response_storage = true
chatgpt_base_url: "https://chatgpt.com/backend-api/".to_string(),
experimental_resume: None,
base_instructions: None,
include_plan_tool: false,
},
o3_profile_config
);
Expand Down Expand Up @@ -889,6 +933,7 @@ disable_response_storage = true
chatgpt_base_url: "https://chatgpt.com/backend-api/".to_string(),
experimental_resume: None,
base_instructions: None,
include_plan_tool: false,
};

assert_eq!(expected_gpt3_profile_config, gpt3_profile_config);
Expand Down Expand Up @@ -952,6 +997,7 @@ disable_response_storage = true
chatgpt_base_url: "https://chatgpt.com/backend-api/".to_string(),
experimental_resume: None,
base_instructions: None,
include_plan_tool: false,
};

assert_eq!(expected_zdr_profile_config, zdr_profile_config);
Expand Down
1 change: 1 addition & 0 deletions codex-rs/core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ mod models;
pub mod openai_api_key;
mod openai_model_info;
mod openai_tools;
pub mod plan_tool;
mod project_doc;
pub mod protocol;
mod rollout;
Expand Down
18 changes: 13 additions & 5 deletions codex-rs/core/src/openai_tools.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,14 @@ use std::collections::BTreeMap;
use std::sync::LazyLock;

use crate::client_common::Prompt;
use crate::plan_tool::PLAN_TOOL;

#[derive(Debug, Clone, Serialize)]
pub(crate) struct ResponsesApiTool {
name: &'static str,
description: &'static str,
strict: bool,
parameters: JsonSchema,
pub(crate) name: &'static str,
pub(crate) description: &'static str,
pub(crate) strict: bool,
pub(crate) parameters: JsonSchema,
}

/// When serialized as JSON, this produces a valid "Tool" in the OpenAI
Expand Down Expand Up @@ -74,6 +75,7 @@ static DEFAULT_CODEX_MODEL_TOOLS: LazyLock<Vec<OpenAiTool>> =
pub(crate) fn create_tools_json_for_responses_api(
prompt: &Prompt,
model: &str,
include_plan_tool: bool,
) -> crate::error::Result<Vec<serde_json::Value>> {
// Assemble tool list: built-in tools + any extra tools from the prompt.
let default_tools = if model.starts_with("codex") {
Expand All @@ -93,6 +95,10 @@ pub(crate) fn create_tools_json_for_responses_api(
.map(|(name, tool)| mcp_tool_to_openai_tool(name, tool)),
);

if include_plan_tool {
tools_json.push(serde_json::to_value(PLAN_TOOL.clone())?);
}

Ok(tools_json)
}

Expand All @@ -102,10 +108,12 @@ pub(crate) fn create_tools_json_for_responses_api(
pub(crate) fn create_tools_json_for_chat_completions_api(
prompt: &Prompt,
model: &str,
include_plan_tool: bool,
) -> crate::error::Result<Vec<serde_json::Value>> {
// We start with the JSON for the Responses API and than rewrite it to match
// the chat completions tool call format.
let responses_api_tools_json = create_tools_json_for_responses_api(prompt, model)?;
let responses_api_tools_json =
create_tools_json_for_responses_api(prompt, model, include_plan_tool)?;
let tools_json = responses_api_tools_json
.into_iter()
.filter_map(|mut tool| {
Expand Down
126 changes: 126 additions & 0 deletions codex-rs/core/src/plan_tool.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
use std::collections::BTreeMap;
use std::sync::LazyLock;

use serde::Deserialize;
use serde::Serialize;

use crate::codex::Session;
use crate::models::FunctionCallOutputPayload;
use crate::models::ResponseInputItem;
use crate::openai_tools::JsonSchema;
use crate::openai_tools::OpenAiTool;
use crate::openai_tools::ResponsesApiTool;
use crate::protocol::Event;
use crate::protocol::EventMsg;

// Types for the TODO tool arguments matching codex-vscode/todo-mcp/src/main.rs
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum StepStatus {
Pending,
InProgress,
Completed,
}

#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(deny_unknown_fields)]
pub struct PlanItemArg {
pub step: String,
pub status: StepStatus,
}

#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(deny_unknown_fields)]
pub struct UpdatePlanArgs {
#[serde(default)]
pub explanation: Option<String>,
pub plan: Vec<PlanItemArg>,
}

pub(crate) static PLAN_TOOL: LazyLock<OpenAiTool> = LazyLock::new(|| {
let mut plan_item_props = BTreeMap::new();
plan_item_props.insert("step".to_string(), JsonSchema::String);
plan_item_props.insert("status".to_string(), JsonSchema::String);

let plan_items_schema = JsonSchema::Array {
items: Box::new(JsonSchema::Object {
properties: plan_item_props,
required: &["step", "status"],
additional_properties: false,
}),
};

let mut properties = BTreeMap::new();
properties.insert("explanation".to_string(), JsonSchema::String);
properties.insert("plan".to_string(), plan_items_schema);

OpenAiTool::Function(ResponsesApiTool {
name: "update_plan",
description: r#"Use the update_plan tool to keep the user updated on the current plan for the task.
After understanding the user's task, call the update_plan tool with an initial plan. An example of a plan:
1. Explore the codebase to find relevant files (status: in_progress)
2. Implement the feature in the XYZ component (status: pending)
3. Commit changes and make a pull request (status: pending)
Each step should be a short, 1-sentence description.
Until all the steps are finished, there should always be exactly one in_progress step in the plan.
Call the update_plan tool whenever you finish a step, marking the completed step as `completed` and marking the next step as `in_progress`.
Before running a command, consider whether or not you have completed the previous step, and make sure to mark it as completed before moving on to the next step.
Sometimes, you may need to change plans in the middle of a task: call `update_plan` with the updated plan and make sure to provide an `explanation` of the rationale when doing so.
When all steps are completed, call update_plan one last time with all steps marked as `completed`."#,
strict: false,
parameters: JsonSchema::Object {
properties,
required: &["plan"],
additional_properties: false,
},
})
});

/// This function doesn't do anything useful. However, it gives the model a structured way to record its plan that clients can read and render.
/// So it's the _inputs_ to this function that are useful to clients, not the outputs and neither are actually useful for the model other
/// than forcing it to come up and document a plan (TBD how that affects performance).
pub(crate) async fn handle_update_plan(
session: &Session,
arguments: String,
sub_id: String,
call_id: String,
) -> ResponseInputItem {
match parse_update_plan_arguments(arguments, &call_id) {
Ok(args) => {
let output = ResponseInputItem::FunctionCallOutput {
call_id,
output: FunctionCallOutputPayload {
content: "Plan updated".to_string(),
success: Some(true),
},
};
session
.send_event(Event {
id: sub_id.to_string(),
msg: EventMsg::PlanUpdate(args),
})
.await;
output
}
Err(output) => *output,
}
}

fn parse_update_plan_arguments(
arguments: String,
call_id: &str,
) -> Result<UpdatePlanArgs, Box<ResponseInputItem>> {
match serde_json::from_str::<UpdatePlanArgs>(&arguments) {
Ok(args) => Ok(args),
Err(e) => {
let output = ResponseInputItem::FunctionCallOutput {
call_id: call_id.to_string(),
output: FunctionCallOutputPayload {
content: format!("failed to parse function arguments: {e}"),
success: None,
},
};
Err(Box::new(output))
}
}
}
3 changes: 3 additions & 0 deletions codex-rs/core/src/protocol.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ use crate::config_types::ReasoningEffort as ReasoningEffortConfig;
use crate::config_types::ReasoningSummary as ReasoningSummaryConfig;
use crate::message_history::HistoryEntry;
use crate::model_provider_info::ModelProviderInfo;
use crate::plan_tool::UpdatePlanArgs;

/// Submission Queue Entry - requests from user
#[derive(Debug, Clone, Deserialize, Serialize)]
Expand Down Expand Up @@ -335,6 +336,8 @@ pub enum EventMsg {
/// Response to GetHistoryEntryRequest.
GetHistoryEntryResponse(GetHistoryEntryResponseEvent),

PlanUpdate(UpdatePlanArgs),

/// Notification that the agent is shutting down.
ShutdownComplete,
}
Expand Down
Loading