codingwatching · pull · Jul 29, 2025 · Jul 29, 2025 · Jul 29, 2025 · Jul 29, 2025
diff --git a/codex-rs/core/src/chat_completions.rs b/codex-rs/core/src/chat_completions.rs
@@ -30,6 +30,7 @@ use crate::util::backoff;
 pub(crate) async fn stream_chat_completions(
     prompt: &Prompt,
     model: &str,
+    include_plan_tool: bool,
     client: &reqwest::Client,
     provider: &ModelProviderInfo,
 ) -> Result<ResponseStream> {
@@ -105,7 +106,7 @@ pub(crate) async fn stream_chat_completions(
         }
     }
 
-    let tools_json = create_tools_json_for_chat_completions_api(prompt, model)?;
+    let tools_json = create_tools_json_for_chat_completions_api(prompt, model, include_plan_tool)?;
     let payload = json!({
         "model": model,
         "messages": messages,

diff --git a/codex-rs/core/src/client.rs b/codex-rs/core/src/client.rs
@@ -77,6 +77,7 @@ impl ModelClient {
                 let response_stream = stream_chat_completions(
                     prompt,
                     &self.config.model,
+                    self.config.include_plan_tool,
                     &self.client,
                     &self.provider,
                 )
@@ -115,7 +116,11 @@ impl ModelClient {
         }
 
         let full_instructions = prompt.get_full_instructions(&self.config.model);
-        let tools_json = create_tools_json_for_responses_api(prompt, &self.config.model)?;
+        let tools_json = create_tools_json_for_responses_api(
+            prompt,
+            &self.config.model,
+            self.config.include_plan_tool,
+        )?;
         let reasoning = create_reasoning_param_for_request(&self.config, self.effort, self.summary);
 
         // Request encrypted COT if we are not storing responses,

diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs
@@ -55,6 +55,7 @@ use crate::models::ReasoningItemReasoningSummary;
 use crate::models::ResponseInputItem;
 use crate::models::ResponseItem;
 use crate::models::ShellToolCallParams;
+use crate::plan_tool::handle_update_plan;
 use crate::project_doc::get_user_instructions;
 use crate::protocol::AgentMessageDeltaEvent;
 use crate::protocol::AgentMessageEvent;
@@ -1336,6 +1337,7 @@ async fn handle_function_call(
             };
             handle_container_exec_with_params(params, sess, sub_id, call_id).await
         }
+        "update_plan" => handle_update_plan(sess, arguments, sub_id, call_id).await,
         _ => {
             match sess.mcp_connection_manager.parse_tool_name(&name) {
                 Some((server, tool_name)) => {

diff --git a/codex-rs/core/src/config.rs b/codex-rs/core/src/config.rs
@@ -143,6 +143,9 @@ pub struct Config {
 
     /// Experimental rollout resume path (absolute path to .jsonl; undocumented).
     pub experimental_resume: Option<PathBuf>,
+
+    /// Include an experimental plan tool that the model can use to update its current plan and status of each step.
+    pub include_plan_tool: bool,
 }
 
 impl Config {
@@ -366,6 +369,7 @@ pub struct ConfigOverrides {
     pub config_profile: Option<String>,
     pub codex_linux_sandbox_exe: Option<PathBuf>,
     pub base_instructions: Option<String>,
+    pub include_plan_tool: Option<bool>,
 }
 
 impl Config {
@@ -388,6 +392,7 @@ impl Config {
             config_profile: config_profile_key,
             codex_linux_sandbox_exe,
             base_instructions,
+            include_plan_tool,
         } = overrides;
 
         let config_profile = match config_profile_key.as_ref().or(cfg.profile.as_ref()) {
@@ -465,9 +470,14 @@ impl Config {
 
         let experimental_resume = cfg.experimental_resume;
 
-        let base_instructions = base_instructions.or(Self::get_base_instructions(
+        // Load base instructions override from a file if specified. If the
+        // path is relative, resolve it against the effective cwd so the
+        // behaviour matches other path-like config values.
+        let file_base_instructions = Self::get_base_instructions(
             cfg.experimental_instructions_file.as_ref(),
-        ));
+            &resolved_cwd,
+        )?;
+        let base_instructions = base_instructions.or(file_base_instructions);
 
         let config = Self {
             model,
@@ -516,8 +526,8 @@ impl Config {
                 .chatgpt_base_url
                 .or(cfg.chatgpt_base_url)
                 .unwrap_or("https://chatgpt.com/backend-api/".to_string()),
-
             experimental_resume,
+            include_plan_tool: include_plan_tool.unwrap_or(false),
         };
         Ok(config)
     }
@@ -539,13 +549,46 @@ impl Config {
         })
     }
 
-    fn get_base_instructions(path: Option<&PathBuf>) -> Option<String> {
-        let path = path.as_ref()?;
+    fn get_base_instructions(
+        path: Option<&PathBuf>,
+        cwd: &Path,
+    ) -> std::io::Result<Option<String>> {
+        let p = match path.as_ref() {
+            None => return Ok(None),
+            Some(p) => p,
+        };
 
-        std::fs::read_to_string(path)
-            .ok()
-            .map(|s| s.trim().to_string())
-            .filter(|s| !s.is_empty())
+        // Resolve relative paths against the provided cwd to make CLI
+        // overrides consistent regardless of where the process was launched
+        // from.
+        let full_path = if p.is_relative() {
+            cwd.join(p)
+        } else {
+            p.to_path_buf()
+        };
+
+        let contents = std::fs::read_to_string(&full_path).map_err(|e| {
+            std::io::Error::new(
+                e.kind(),
+                format!(
+                    "failed to read experimental instructions file {}: {e}",
+                    full_path.display()
+                ),
+            )
+        })?;
+
+        let s = contents.trim().to_string();
+        if s.is_empty() {
+            Err(std::io::Error::new(
+                std::io::ErrorKind::InvalidData,
+                format!(
+                    "experimental instructions file is empty: {}",
+                    full_path.display()
+                ),
+            ))
+        } else {
+            Ok(Some(s))
+        }
     }
 }
 
@@ -791,7 +834,7 @@ disable_response_storage = true
     ///
     /// 1. custom command-line argument, e.g. `--model o3`
     /// 2. as part of a profile, where the `--profile` is specified via a CLI
-    ///    (or in the config file itelf)
+    ///    (or in the config file itself)
     /// 3. as an entry in `config.toml`, e.g. `model = "o3"`
     /// 4. the default value for a required field defined in code, e.g.,
     ///    `crate::flags::OPENAI_DEFAULT_MODEL`
@@ -841,6 +884,7 @@ disable_response_storage = true
                 chatgpt_base_url: "https://chatgpt.com/backend-api/".to_string(),
                 experimental_resume: None,
                 base_instructions: None,
+                include_plan_tool: false,
             },
             o3_profile_config
         );
@@ -889,6 +933,7 @@ disable_response_storage = true
             chatgpt_base_url: "https://chatgpt.com/backend-api/".to_string(),
             experimental_resume: None,
             base_instructions: None,
+            include_plan_tool: false,
         };
 
         assert_eq!(expected_gpt3_profile_config, gpt3_profile_config);
@@ -952,6 +997,7 @@ disable_response_storage = true
             chatgpt_base_url: "https://chatgpt.com/backend-api/".to_string(),
             experimental_resume: None,
             base_instructions: None,
+            include_plan_tool: false,
         };
 
         assert_eq!(expected_zdr_profile_config, zdr_profile_config);

diff --git a/codex-rs/core/src/lib.rs b/codex-rs/core/src/lib.rs
@@ -34,6 +34,7 @@ mod models;
 pub mod openai_api_key;
 mod openai_model_info;
 mod openai_tools;
+pub mod plan_tool;
 mod project_doc;
 pub mod protocol;
 mod rollout;

diff --git a/codex-rs/core/src/openai_tools.rs b/codex-rs/core/src/openai_tools.rs
@@ -4,13 +4,14 @@ use std::collections::BTreeMap;
 use std::sync::LazyLock;
 
 use crate::client_common::Prompt;
+use crate::plan_tool::PLAN_TOOL;
 
 #[derive(Debug, Clone, Serialize)]
 pub(crate) struct ResponsesApiTool {
-    name: &'static str,
-    description: &'static str,
-    strict: bool,
-    parameters: JsonSchema,
+    pub(crate) name: &'static str,
+    pub(crate) description: &'static str,
+    pub(crate) strict: bool,
+    pub(crate) parameters: JsonSchema,
 }
 
 /// When serialized as JSON, this produces a valid "Tool" in the OpenAI
@@ -74,6 +75,7 @@ static DEFAULT_CODEX_MODEL_TOOLS: LazyLock<Vec<OpenAiTool>> =
 pub(crate) fn create_tools_json_for_responses_api(
     prompt: &Prompt,
     model: &str,
+    include_plan_tool: bool,
 ) -> crate::error::Result<Vec<serde_json::Value>> {
     // Assemble tool list: built-in tools + any extra tools from the prompt.
     let default_tools = if model.starts_with("codex") {
@@ -93,6 +95,10 @@ pub(crate) fn create_tools_json_for_responses_api(
             .map(|(name, tool)| mcp_tool_to_openai_tool(name, tool)),
     );
 
+    if include_plan_tool {
+        tools_json.push(serde_json::to_value(PLAN_TOOL.clone())?);
+    }
+
     Ok(tools_json)
 }
 
@@ -102,10 +108,12 @@ pub(crate) fn create_tools_json_for_responses_api(
 pub(crate) fn create_tools_json_for_chat_completions_api(
     prompt: &Prompt,
     model: &str,
+    include_plan_tool: bool,
 ) -> crate::error::Result<Vec<serde_json::Value>> {
     // We start with the JSON for the Responses API and than rewrite it to match
     // the chat completions tool call format.
-    let responses_api_tools_json = create_tools_json_for_responses_api(prompt, model)?;
+    let responses_api_tools_json =
+        create_tools_json_for_responses_api(prompt, model, include_plan_tool)?;
     let tools_json = responses_api_tools_json
         .into_iter()
         .filter_map(|mut tool| {

diff --git a/codex-rs/core/src/plan_tool.rs b/codex-rs/core/src/plan_tool.rs
@@ -0,0 +1,126 @@
+use std::collections::BTreeMap;
+use std::sync::LazyLock;
+
+use serde::Deserialize;
+use serde::Serialize;
+
+use crate::codex::Session;
+use crate::models::FunctionCallOutputPayload;
+use crate::models::ResponseInputItem;
+use crate::openai_tools::JsonSchema;
+use crate::openai_tools::OpenAiTool;
+use crate::openai_tools::ResponsesApiTool;
+use crate::protocol::Event;
+use crate::protocol::EventMsg;
+
+// Types for the TODO tool arguments matching codex-vscode/todo-mcp/src/main.rs
+#[derive(Debug, Clone, Serialize, Deserialize)]
+#[serde(rename_all = "snake_case")]
+pub enum StepStatus {
+    Pending,
+    InProgress,
+    Completed,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+#[serde(deny_unknown_fields)]
+pub struct PlanItemArg {
+    pub step: String,
+    pub status: StepStatus,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+#[serde(deny_unknown_fields)]
+pub struct UpdatePlanArgs {
+    #[serde(default)]
+    pub explanation: Option<String>,
+    pub plan: Vec<PlanItemArg>,
+}
+
+pub(crate) static PLAN_TOOL: LazyLock<OpenAiTool> = LazyLock::new(|| {
+    let mut plan_item_props = BTreeMap::new();
+    plan_item_props.insert("step".to_string(), JsonSchema::String);
+    plan_item_props.insert("status".to_string(), JsonSchema::String);
+
+    let plan_items_schema = JsonSchema::Array {
+        items: Box::new(JsonSchema::Object {
+            properties: plan_item_props,
+            required: &["step", "status"],
+            additional_properties: false,
+        }),
+    };
+
+    let mut properties = BTreeMap::new();
+    properties.insert("explanation".to_string(), JsonSchema::String);
+    properties.insert("plan".to_string(), plan_items_schema);
+
+    OpenAiTool::Function(ResponsesApiTool {
+        name: "update_plan",
+        description: r#"Use the update_plan tool to keep the user updated on the current plan for the task.
+After understanding the user's task, call the update_plan tool with an initial plan. An example of a plan:
+1. Explore the codebase to find relevant files (status: in_progress)
+2. Implement the feature in the XYZ component (status: pending)
+3. Commit changes and make a pull request (status: pending)
+Each step should be a short, 1-sentence description.
+Until all the steps are finished, there should always be exactly one in_progress step in the plan.
+Call the update_plan tool whenever you finish a step, marking the completed step as `completed` and marking the next step as `in_progress`.
+Before running a command, consider whether or not you have completed the previous step, and make sure to mark it as completed before moving on to the next step.
+Sometimes, you may need to change plans in the middle of a task: call `update_plan` with the updated plan and make sure to provide an `explanation` of the rationale when doing so.
+When all steps are completed, call update_plan one last time with all steps marked as `completed`."#,
+        strict: false,
+        parameters: JsonSchema::Object {
+            properties,
+            required: &["plan"],
+            additional_properties: false,
+        },
+    })
+});
+
+/// This function doesn't do anything useful. However, it gives the model a structured way to record its plan that clients can read and render.
+/// So it's the _inputs_ to this function that are useful to clients, not the outputs and neither are actually useful for the model other
+/// than forcing it to come up and document a plan (TBD how that affects performance).
+pub(crate) async fn handle_update_plan(
+    session: &Session,
+    arguments: String,
+    sub_id: String,
+    call_id: String,
+) -> ResponseInputItem {
+    match parse_update_plan_arguments(arguments, &call_id) {
+        Ok(args) => {
+            let output = ResponseInputItem::FunctionCallOutput {
+                call_id,
+                output: FunctionCallOutputPayload {
+                    content: "Plan updated".to_string(),
+                    success: Some(true),
+                },
+            };
+            session
+                .send_event(Event {
+                    id: sub_id.to_string(),
+                    msg: EventMsg::PlanUpdate(args),
+                })
+                .await;
+            output
+        }
+        Err(output) => *output,
+    }
+}
+
+fn parse_update_plan_arguments(
+    arguments: String,
+    call_id: &str,
+) -> Result<UpdatePlanArgs, Box<ResponseInputItem>> {
+    match serde_json::from_str::<UpdatePlanArgs>(&arguments) {
+        Ok(args) => Ok(args),
+        Err(e) => {
+            let output = ResponseInputItem::FunctionCallOutput {
+                call_id: call_id.to_string(),
+                output: FunctionCallOutputPayload {
+                    content: format!("failed to parse function arguments: {e}"),
+                    success: None,
+                },
+            };
+            Err(Box::new(output))
+        }
+    }
+}
diff --git a/codex-rs/core/src/protocol.rs b/codex-rs/core/src/protocol.rs
@@ -19,6 +19,7 @@ use crate::config_types::ReasoningEffort as ReasoningEffortConfig;
 use crate::config_types::ReasoningSummary as ReasoningSummaryConfig;
 use crate::message_history::HistoryEntry;
 use crate::model_provider_info::ModelProviderInfo;
+use crate::plan_tool::UpdatePlanArgs;
 
 /// Submission Queue Entry - requests from user
 #[derive(Debug, Clone, Deserialize, Serialize)]
@@ -335,6 +336,8 @@ pub enum EventMsg {
     /// Response to GetHistoryEntryRequest.
     GetHistoryEntryResponse(GetHistoryEntryResponseEvent),
 
+    PlanUpdate(UpdatePlanArgs),
+
     /// Notification that the agent is shutting down.
     ShutdownComplete,
 }