From 29336949cdcd83e8c1cf4fcf0100017046c0fa0d Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Mon, 25 May 2026 16:15:56 -0400 Subject: [PATCH 1/6] Add post-tool-use failure hook Expose postToolUseFailure hooks across SDKs, wire runtime hook dispatch, update docs, and add cross-language E2E coverage for failed tool results. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .github/agents/docs-maintenance.agent.md | 8 +- docs/features/hooks.md | 602 +++++++++--------- docs/hooks/hooks-overview.md | 3 +- docs/hooks/post-tool-use.md | 97 ++- docs/troubleshooting/compatibility.md | 3 +- dotnet/README.md | 16 +- dotnet/src/Client.cs | 2 + dotnet/src/Session.cs | 7 + dotnet/src/Types.cs | 69 ++ .../E2E/HookLifecycleAndOutputE2ETests.cs | 62 +- go/README.md | 16 +- go/client.go | 2 + go/internal/e2e/hooks_extended_e2e_test.go | 84 ++- go/session.go | 10 + go/session_test.go | 72 ++- go/types.go | 50 ++ nodejs/README.md | 15 +- nodejs/docs/agent-author.md | 38 +- nodejs/docs/examples.md | 33 +- nodejs/src/session.ts | 5 +- nodejs/src/types.ts | 59 +- nodejs/test/client.test.ts | 173 +++++ nodejs/test/e2e/hooks_extended.e2e.test.ts | 35 + python/README.md | 13 +- python/copilot/__init__.py | 6 + python/copilot/session.py | 34 + python/e2e/test_hooks_extended_e2e.py | 48 +- python/test_client.py | 69 ++ rust/README.md | 2 +- rust/src/hooks.rs | 166 +++++ rust/tests/e2e/hooks_extended.rs | 84 ++- ...sefailure_hook_for_failed_tool_result.yaml | 24 + 32 files changed, 1524 insertions(+), 383 deletions(-) create mode 100644 test/snapshots/hooks_extended/should_invoke_posttoolusefailure_hook_for_failed_tool_result.yaml diff --git a/.github/agents/docs-maintenance.agent.md b/.github/agents/docs-maintenance.agent.md index c5363e369..2a6685de2 100644 --- a/.github/agents/docs-maintenance.agent.md +++ b/.github/agents/docs-maintenance.agent.md @@ -344,7 +344,7 @@ cat nodejs/src/types.ts | grep -A 10 "export interface ExportSessionOptions" - `CopilotClient` constructor options: `cliPath`, `cliUrl`, `useStdio`, `port`, `logLevel`, `autoStart`, `env`, `githubToken`, `useLoggedInUser` - `createSession()` config: `model`, `tools`, `hooks`, `systemMessage`, `mcpServers`, `availableTools`, `excludedTools`, `streaming`, `reasoningEffort`, `provider`, `infiniteSessions`, `customAgents`, `workingDirectory` - `CopilotSession` methods: `send()`, `sendAndWait()`, `getMessages()`, `disconnect()`, `abort()`, `on()`, `once()`, `off()` -- Hook names: `onPreToolUse`, `onPostToolUse`, `onUserPromptSubmitted`, `onSessionStart`, `onSessionEnd`, `onErrorOccurred` +- Hook names: `onPreToolUse`, `onPostToolUse`, `onPostToolUseFailure`, `onUserPromptSubmitted`, `onSessionStart`, `onSessionEnd`, `onErrorOccurred` #### Python Validation @@ -362,7 +362,7 @@ cat python/copilot/types.py | grep -A 15 "class SessionHooks" - `CopilotClient` options: `cli_path`, `cli_url`, `use_stdio`, `port`, `log_level`, `auto_start`, `env`, `github_token`, `use_logged_in_user` - `create_session()` config keys: `model`, `tools`, `hooks`, `system_message`, `mcp_servers`, `available_tools`, `excluded_tools`, `streaming`, `reasoning_effort`, `provider`, `infinite_sessions`, `custom_agents`, `working_directory` - `CopilotSession` methods: `send()`, `send_and_wait()`, `get_messages()`, `disconnect()`, `abort()`, `export_session()` -- Hook names: `on_pre_tool_use`, `on_post_tool_use`, `on_user_prompt_submitted`, `on_session_start`, `on_session_end`, `on_error_occurred` +- Hook names: `on_pre_tool_use`, `on_post_tool_use`, `on_post_tool_use_failure`, `on_user_prompt_submitted`, `on_session_start`, `on_session_end`, `on_error_occurred` #### Go Validation @@ -380,7 +380,7 @@ cat go/types.go | grep -A 15 "type SessionHooks struct" - `ClientOptions` fields: `CLIPath`, `CLIUrl`, `UseStdio`, `Port`, `LogLevel`, `AutoStart`, `Env`, `GithubToken`, `UseLoggedInUser` - `SessionConfig` fields: `Model`, `Tools`, `Hooks`, `SystemMessage`, `MCPServers`, `AvailableTools`, `ExcludedTools`, `Streaming`, `ReasoningEffort`, `Provider`, `InfiniteSessions`, `CustomAgents`, `WorkingDirectory` - `Session` methods: `Send()`, `SendAndWait()`, `GetMessages()`, `Disconnect()`, `Abort()`, `ExportSession()` -- Hook fields: `OnPreToolUse`, `OnPostToolUse`, `OnUserPromptSubmitted`, `OnSessionStart`, `OnSessionEnd`, `OnErrorOccurred` +- Hook fields: `OnPreToolUse`, `OnPostToolUse`, `OnPostToolUseFailure`, `OnUserPromptSubmitted`, `OnSessionStart`, `OnSessionEnd`, `OnErrorOccurred` #### .NET Validation @@ -398,7 +398,7 @@ cat dotnet/src/Types.cs | grep -A 15 "public class SessionHooks" - `CopilotClientOptions` properties: `CliPath`, `CliUrl`, `UseStdio`, `Port`, `LogLevel`, `AutoStart`, `Environment`, `GithubToken`, `UseLoggedInUser` - `SessionConfig` properties: `Model`, `Tools`, `Hooks`, `SystemMessage`, `McpServers`, `AvailableTools`, `ExcludedTools`, `Streaming`, `ReasoningEffort`, `Provider`, `InfiniteSessions`, `CustomAgents`, `WorkingDirectory` - `CopilotSession` methods: `SendAsync()`, `SendAndWaitAsync()`, `GetMessagesAsync()`, `DisposeAsync()`, `AbortAsync()`, `ExportSessionAsync()` -- Hook properties: `OnPreToolUse`, `OnPostToolUse`, `OnUserPromptSubmitted`, `OnSessionStart`, `OnSessionEnd`, `OnErrorOccurred` +- Hook properties: `OnPreToolUse`, `OnPostToolUse`, `OnPostToolUseFailure`, `OnUserPromptSubmitted`, `OnSessionStart`, `OnSessionEnd`, `OnErrorOccurred` #### Common Sample Errors to Check diff --git a/docs/features/hooks.md b/docs/features/hooks.md index bd55797dd..b348488a0 100644 --- a/docs/features/hooks.md +++ b/docs/features/hooks.md @@ -19,14 +19,15 @@ flowchart LR D -.->|error| H ``` -| Hook | When it fires | What you can do | -|------|---------------|-----------------| -| [`onSessionStart`](../hooks/session-lifecycle.md#session-start) | Session begins (new or resumed) | Inject context, load preferences | -| [`onUserPromptSubmitted`](../hooks/user-prompt-submitted.md) | User sends a message | Rewrite prompts, add context, filter input | -| [`onPreToolUse`](../hooks/pre-tool-use.md) | Before a tool executes | Allow / deny / modify the call | -| [`onPostToolUse`](../hooks/post-tool-use.md) | After a tool returns | Transform results, redact secrets, audit | -| [`onSessionEnd`](../hooks/session-lifecycle.md#session-end) | Session ends | Clean up, record metrics | -| [`onErrorOccurred`](../hooks/error-handling.md) | An error is raised | Custom logging, retry logic, alerts | +| Hook | When it fires | What you can do | +| ------------------------------------------------------------------- | ----------------------------------- | ------------------------------------------ | +| [`onSessionStart`](../hooks/session-lifecycle.md#session-start) | Session begins (new or resumed) | Inject context, load preferences | +| [`onUserPromptSubmitted`](../hooks/user-prompt-submitted.md) | User sends a message | Rewrite prompts, add context, filter input | +| [`onPreToolUse`](../hooks/pre-tool-use.md) | Before a tool executes | Allow / deny / modify the call | +| [`onPostToolUse`](../hooks/post-tool-use.md) | After a tool returns (success only) | Transform results, redact secrets, audit | +| [`onPostToolUseFailure`](../hooks/post-tool-use.md#failure-variant) | After a tool returns a failure | Inject retry guidance, log failures | +| [`onSessionEnd`](../hooks/session-lifecycle.md#session-end) | Session ends | Clean up, record metrics | +| [`onErrorOccurred`](../hooks/error-handling.md) | An error is raised | Custom logging, retry logic, alerts | All hooks are **optional**—register only the ones you need. Returning `null` (or the language equivalent) from any hook tells the SDK to continue with default behavior. @@ -44,13 +45,19 @@ const client = new CopilotClient(); await client.start(); const session = await client.createSession({ - hooks: { - onSessionStart: async (input, invocation) => { /* ... */ }, - onPreToolUse: async (input, invocation) => { /* ... */ }, - onPostToolUse: async (input, invocation) => { /* ... */ }, - // ... add only the hooks you need + hooks: { + onSessionStart: async (input, invocation) => { + /* ... */ + }, + onPreToolUse: async (input, invocation) => { + /* ... */ + }, + onPostToolUse: async (input, invocation) => { + /* ... */ }, - onPermissionRequest: async () => ({ kind: "approve-once" }), + // ... add only the hooks you need + }, + onPermissionRequest: async () => ({ kind: "approve-once" }), }); ``` @@ -82,6 +89,7 @@ session = await client.create_session( Go + ```go package main @@ -121,6 +129,7 @@ func main() { _ = err } ``` + ```go @@ -145,6 +154,7 @@ session, err := client.CreateSession(ctx, &copilot.SessionConfig{ .NET + ```csharp using GitHub.Copilot; using GitHub.Copilot.Rpc; @@ -176,6 +186,7 @@ public static class HooksExample } } ``` + ```csharp @@ -241,19 +252,18 @@ Use `onPreToolUse` to build a permission layer that decides which tools the agen const READ_ONLY_TOOLS = ["read_file", "glob", "grep", "view"]; const session = await client.createSession({ - hooks: { - onPreToolUse: async (input) => { - if (!READ_ONLY_TOOLS.includes(input.toolName)) { - return { - permissionDecision: "deny", - permissionDecisionReason: - `Only read-only tools are allowed. "${input.toolName}" was blocked.`, - }; - } - return { permissionDecision: "allow" }; - }, + hooks: { + onPreToolUse: async (input) => { + if (!READ_ONLY_TOOLS.includes(input.toolName)) { + return { + permissionDecision: "deny", + permissionDecisionReason: `Only read-only tools are allowed. "${input.toolName}" was blocked.`, + }; + } + return { permissionDecision: "allow" }; }, - onPermissionRequest: async () => ({ kind: "approve-once" }), + }, + onPermissionRequest: async () => ({ kind: "approve-once" }), }); ``` @@ -288,6 +298,7 @@ session = await client.create_session( Go + ```go package main @@ -323,6 +334,7 @@ func main() { _ = session } ``` + ```go @@ -349,6 +361,7 @@ session, _ := client.CreateSession(ctx, &copilot.SessionConfig{ .NET + ```csharp using GitHub.Copilot; using GitHub.Copilot.Rpc; @@ -385,6 +398,7 @@ public static class PermissionControlExample } } ``` + ```csharp @@ -452,24 +466,23 @@ var session = client.createSession( const ALLOWED_DIRS = ["/home/user/projects", "/tmp"]; const session = await client.createSession({ - hooks: { - onPreToolUse: async (input) => { - if (["read_file", "write_file", "edit"].includes(input.toolName)) { - const filePath = (input.toolArgs as { path: string }).path; - const allowed = ALLOWED_DIRS.some((dir) => filePath.startsWith(dir)); - - if (!allowed) { - return { - permissionDecision: "deny", - permissionDecisionReason: - `Access to "${filePath}" is outside the allowed directories.`, - }; - } - } - return { permissionDecision: "allow" }; - }, + hooks: { + onPreToolUse: async (input) => { + if (["read_file", "write_file", "edit"].includes(input.toolName)) { + const filePath = (input.toolArgs as { path: string }).path; + const allowed = ALLOWED_DIRS.some((dir) => filePath.startsWith(dir)); + + if (!allowed) { + return { + permissionDecision: "deny", + permissionDecisionReason: `Access to "${filePath}" is outside the allowed directories.`, + }; + } + } + return { permissionDecision: "allow" }; }, - onPermissionRequest: async () => ({ kind: "approve-once" }), + }, + onPermissionRequest: async () => ({ kind: "approve-once" }), }); ``` @@ -479,15 +492,15 @@ const session = await client.createSession({ const DESTRUCTIVE_TOOLS = ["delete_file", "shell", "bash"]; const session = await client.createSession({ - hooks: { - onPreToolUse: async (input) => { - if (DESTRUCTIVE_TOOLS.includes(input.toolName)) { - return { permissionDecision: "ask" }; - } - return { permissionDecision: "allow" }; - }, + hooks: { + onPreToolUse: async (input) => { + if (DESTRUCTIVE_TOOLS.includes(input.toolName)) { + return { permissionDecision: "ask" }; + } + return { permissionDecision: "allow" }; }, - onPermissionRequest: async () => ({ kind: "approve-once" }), + }, + onPermissionRequest: async () => ({ kind: "approve-once" }), }); ``` @@ -504,72 +517,72 @@ Combine `onPreToolUse`, `onPostToolUse`, and the session lifecycle hooks to buil ```typescript interface AuditEntry { - timestamp: number; - sessionId: string; - event: string; - toolName?: string; - toolArgs?: unknown; - toolResult?: unknown; - prompt?: string; + timestamp: Date; + sessionId: string; + event: string; + toolName?: string; + toolArgs?: unknown; + toolResult?: unknown; + prompt?: string; } const auditLog: AuditEntry[] = []; const session = await client.createSession({ - hooks: { - onSessionStart: async (input, invocation) => { - auditLog.push({ - timestamp: input.timestamp, - sessionId: invocation.sessionId, - event: "session_start", - }); - return null; - }, - onUserPromptSubmitted: async (input, invocation) => { - auditLog.push({ - timestamp: input.timestamp, - sessionId: invocation.sessionId, - event: "user_prompt", - prompt: input.prompt, - }); - return null; - }, - onPreToolUse: async (input, invocation) => { - auditLog.push({ - timestamp: input.timestamp, - sessionId: invocation.sessionId, - event: "tool_call", - toolName: input.toolName, - toolArgs: input.toolArgs, - }); - return { permissionDecision: "allow" }; - }, - onPostToolUse: async (input, invocation) => { - auditLog.push({ - timestamp: input.timestamp, - sessionId: invocation.sessionId, - event: "tool_result", - toolName: input.toolName, - toolResult: input.toolResult, - }); - return null; - }, - onSessionEnd: async (input, invocation) => { - auditLog.push({ - timestamp: input.timestamp, - sessionId: invocation.sessionId, - event: "session_end", - }); - - // Persist the log — swap this with your own storage backend - await fs.promises.writeFile( - `audit-${invocation.sessionId}.json`, - JSON.stringify(auditLog, null, 2), - ); - return null; - }, + hooks: { + onSessionStart: async (input, invocation) => { + auditLog.push({ + timestamp: input.timestamp, + sessionId: invocation.sessionId, + event: "session_start", + }); + return null; + }, + onUserPromptSubmitted: async (input, invocation) => { + auditLog.push({ + timestamp: input.timestamp, + sessionId: invocation.sessionId, + event: "user_prompt", + prompt: input.prompt, + }); + return null; + }, + onPreToolUse: async (input, invocation) => { + auditLog.push({ + timestamp: input.timestamp, + sessionId: invocation.sessionId, + event: "tool_call", + toolName: input.toolName, + toolArgs: input.toolArgs, + }); + return { permissionDecision: "allow" }; }, - onPermissionRequest: async () => ({ kind: "approve-once" }), + onPostToolUse: async (input, invocation) => { + auditLog.push({ + timestamp: input.timestamp, + sessionId: invocation.sessionId, + event: "tool_result", + toolName: input.toolName, + toolResult: input.toolResult, + }); + return null; + }, + onSessionEnd: async (input, invocation) => { + auditLog.push({ + timestamp: input.timestamp, + sessionId: invocation.sessionId, + event: "session_end", + }); + + // Persist the log — swap this with your own storage backend + await fs.promises.writeFile( + `audit-${invocation.sessionId}.json`, + JSON.stringify(auditLog, null, 2), + ); + return null; + }, + }, + onPermissionRequest: async () => ({ kind: "approve-once" }), }); ``` @@ -579,6 +592,7 @@ const session = await client.createSession({ Python + ```python import json, aiofiles from copilot import PermissionDecisionApproveOnce @@ -587,7 +601,7 @@ audit_log = [] async def on_session_start(input_data, invocation): audit_log.append({ - "timestamp": input_data["timestamp"], + "timestamp": input_data["timestamp"].isoformat(), "session_id": invocation["session_id"], "event": "session_start", }) @@ -595,7 +609,7 @@ async def on_session_start(input_data, invocation): async def on_user_prompt_submitted(input_data, invocation): audit_log.append({ - "timestamp": input_data["timestamp"], + "timestamp": input_data["timestamp"].isoformat(), "session_id": invocation["session_id"], "event": "user_prompt", "prompt": input_data["prompt"], @@ -604,7 +618,7 @@ async def on_user_prompt_submitted(input_data, invocation): async def on_pre_tool_use(input_data, invocation): audit_log.append({ - "timestamp": input_data["timestamp"], + "timestamp": input_data["timestamp"].isoformat(), "session_id": invocation["session_id"], "event": "tool_call", "tool_name": input_data["toolName"], @@ -614,7 +628,7 @@ async def on_pre_tool_use(input_data, invocation): async def on_post_tool_use(input_data, invocation): audit_log.append({ - "timestamp": input_data["timestamp"], + "timestamp": input_data["timestamp"].isoformat(), "session_id": invocation["session_id"], "event": "tool_result", "tool_name": input_data["toolName"], @@ -624,7 +638,7 @@ async def on_post_tool_use(input_data, invocation): async def on_session_end(input_data, invocation): audit_log.append({ - "timestamp": input_data["timestamp"], + "timestamp": input_data["timestamp"].isoformat(), "session_id": invocation["session_id"], "event": "session_end", }) @@ -650,25 +664,25 @@ session = await client.create_session( ```typescript const SECRET_PATTERNS = [ - /(?:api[_-]?key|token|secret|password)\s*[:=]\s*["']?[\w\-\.]+["']?/gi, + /(?:api[_-]?key|token|secret|password)\s*[:=]\s*["']?[\w\-\.]+["']?/gi, ]; const session = await client.createSession({ - hooks: { - onPostToolUse: async (input) => { - if (typeof input.toolResult !== "string") return null; - - let redacted = input.toolResult; - for (const pattern of SECRET_PATTERNS) { - redacted = redacted.replace(pattern, "[REDACTED]"); - } - - return redacted !== input.toolResult - ? { modifiedResult: redacted } - : null; - }, + hooks: { + onPostToolUse: async (input) => { + if (typeof input.toolResult !== "string") return null; + + let redacted = input.toolResult; + for (const pattern of SECRET_PATTERNS) { + redacted = redacted.replace(pattern, "[REDACTED]"); + } + + return redacted !== input.toolResult + ? { modifiedResult: redacted } + : null; }, - onPermissionRequest: async () => ({ kind: "approve-once" }), + }, + onPermissionRequest: async () => ({ kind: "approve-once" }), }); ``` @@ -685,23 +699,23 @@ Hooks fire in your application's process, so you can trigger any side-effect—d import notifier from "node-notifier"; // npm install node-notifier const session = await client.createSession({ - hooks: { - onSessionEnd: async (input, invocation) => { - notifier.notify({ - title: "Copilot Session Complete", - message: `Session ${invocation.sessionId.slice(0, 8)} finished (${input.reason}).`, - }); - return null; - }, - onErrorOccurred: async (input) => { - notifier.notify({ - title: "Copilot Error", - message: input.error.slice(0, 200), - }); - return null; - }, + hooks: { + onSessionEnd: async (input, invocation) => { + notifier.notify({ + title: "Copilot Session Complete", + message: `Session ${invocation.sessionId.slice(0, 8)} finished (${input.reason}).`, + }); + return null; }, - onPermissionRequest: async () => ({ kind: "approve-once" }), + onErrorOccurred: async (input) => { + notifier.notify({ + title: "Copilot Error", + message: input.error.slice(0, 200), + }); + return null; + }, + }, + onPermissionRequest: async () => ({ kind: "approve-once" }), }); ``` @@ -747,18 +761,18 @@ session = await client.create_session( import { exec } from "node:child_process"; const session = await client.createSession({ - hooks: { - onPostToolUse: async (input) => { - // macOS: play a system sound after every tool call - exec("afplay /System/Library/Sounds/Pop.aiff"); - return null; - }, - onErrorOccurred: async () => { - exec("afplay /System/Library/Sounds/Basso.aiff"); - return null; - }, + hooks: { + onPostToolUse: async (input) => { + // macOS: play a system sound after every tool call + exec("afplay /System/Library/Sounds/Pop.aiff"); + return null; }, - onPermissionRequest: async () => ({ kind: "approve-once" }), + onErrorOccurred: async () => { + exec("afplay /System/Library/Sounds/Basso.aiff"); + return null; + }, + }, + onPermissionRequest: async () => ({ kind: "approve-once" }), }); ``` @@ -768,21 +782,21 @@ const session = await client.createSession({ const SLACK_WEBHOOK_URL = process.env.SLACK_WEBHOOK_URL!; const session = await client.createSession({ - hooks: { - onErrorOccurred: async (input, invocation) => { - if (!input.recoverable) { - await fetch(SLACK_WEBHOOK_URL, { - method: "POST", - headers: { "Content-Type": "application/json" }, - body: JSON.stringify({ - text: `🚨 Unrecoverable error in session \`${invocation.sessionId.slice(0, 8)}\`:\n\`\`\`${input.error}\`\`\``, - }), - }); - } - return null; - }, + hooks: { + onErrorOccurred: async (input, invocation) => { + if (!input.recoverable) { + await fetch(SLACK_WEBHOOK_URL, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + text: `🚨 Unrecoverable error in session \`${invocation.sessionId.slice(0, 8)}\`:\n\`\`\`${input.error}\`\`\``, + }), + }); + } + return null; }, - onPermissionRequest: async () => ({ kind: "approve-once" }), + }, + onPermissionRequest: async () => ({ kind: "approve-once" }), }); ``` @@ -794,21 +808,21 @@ Use `onSessionStart` and `onUserPromptSubmitted` to automatically inject context ```typescript const session = await client.createSession({ - hooks: { - onSessionStart: async (input) => { - const pkg = JSON.parse( - await fs.promises.readFile("package.json", "utf-8"), - ); - return { - additionalContext: [ - `Project: ${pkg.name} v${pkg.version}`, - `Node: ${process.version}`, - `CWD: ${input.cwd}`, - ].join("\n"), - }; - }, + hooks: { + onSessionStart: async (input) => { + const pkg = JSON.parse( + await fs.promises.readFile("package.json", "utf-8"), + ); + return { + additionalContext: [ + `Project: ${pkg.name} v${pkg.version}`, + `Node: ${process.version}`, + `Working directory: ${input.workingDirectory}`, + ].join("\n"), + }; }, - onPermissionRequest: async () => ({ kind: "approve-once" }), + }, + onPermissionRequest: async () => ({ kind: "approve-once" }), }); ``` @@ -816,25 +830,25 @@ const session = await client.createSession({ ```typescript const SHORTCUTS: Record = { - "/fix": "Find and fix all errors in the current file", - "/test": "Write comprehensive unit tests for this code", - "/explain": "Explain this code in detail", - "/refactor": "Refactor this code to improve readability", + "/fix": "Find and fix all errors in the current file", + "/test": "Write comprehensive unit tests for this code", + "/explain": "Explain this code in detail", + "/refactor": "Refactor this code to improve readability", }; const session = await client.createSession({ - hooks: { - onUserPromptSubmitted: async (input) => { - for (const [shortcut, expansion] of Object.entries(SHORTCUTS)) { - if (input.prompt.startsWith(shortcut)) { - const rest = input.prompt.slice(shortcut.length).trim(); - return { modifiedPrompt: rest ? `${expansion}: ${rest}` : expansion }; - } - } - return null; - }, + hooks: { + onUserPromptSubmitted: async (input) => { + for (const [shortcut, expansion] of Object.entries(SHORTCUTS)) { + if (input.prompt.startsWith(shortcut)) { + const rest = input.prompt.slice(shortcut.length).trim(); + return { modifiedPrompt: rest ? `${expansion}: ${rest}` : expansion }; + } + } + return null; }, - onPermissionRequest: async () => ({ kind: "approve-once" }), + }, + onPermissionRequest: async () => ({ kind: "approve-once" }), }); ``` @@ -846,19 +860,19 @@ The `onErrorOccurred` hook gives you a chance to react to failures—whether tha ```typescript const session = await client.createSession({ - hooks: { - onErrorOccurred: async (input) => { - if (input.errorContext === "model_call" && input.recoverable) { - return { - errorHandling: "retry", - retryCount: 3, - userNotification: "Temporary model issue — retrying…", - }; - } - return null; - }, + hooks: { + onErrorOccurred: async (input) => { + if (input.errorContext === "model_call" && input.recoverable) { + return { + errorHandling: "retry", + retryCount: 3, + userNotification: "Temporary model issue — retrying…", + }; + } + return null; }, - onPermissionRequest: async () => ({ kind: "approve-once" }), + }, + onPermissionRequest: async () => ({ kind: "approve-once" }), }); ``` @@ -866,20 +880,20 @@ const session = await client.createSession({ ```typescript const FRIENDLY_MESSAGES: Record = { - model_call: "The AI model is temporarily unavailable. Please try again.", - tool_execution: "A tool encountered an error. Check inputs and try again.", - system: "A system error occurred. Please try again later.", + model_call: "The AI model is temporarily unavailable. Please try again.", + tool_execution: "A tool encountered an error. Check inputs and try again.", + system: "A system error occurred. Please try again later.", }; const session = await client.createSession({ - hooks: { - onErrorOccurred: async (input) => { - return { - userNotification: FRIENDLY_MESSAGES[input.errorContext] ?? input.error, - }; - }, + hooks: { + onErrorOccurred: async (input) => { + return { + userNotification: FRIENDLY_MESSAGES[input.errorContext] ?? input.error, + }; }, - onPermissionRequest: async () => ({ kind: "approve-once" }), + }, + onPermissionRequest: async () => ({ kind: "approve-once" }), }); ``` @@ -891,41 +905,45 @@ Track how long sessions run, how many tools are invoked, and why sessions end— Node.js / TypeScript ```typescript -const metrics = new Map(); +const metrics = new Map< + string, + { start: Date; toolCalls: number; prompts: number } +>(); const session = await client.createSession({ - hooks: { - onSessionStart: async (input, invocation) => { - metrics.set(invocation.sessionId, { - start: input.timestamp, - toolCalls: 0, - prompts: 0, - }); - return null; - }, - onUserPromptSubmitted: async (_input, invocation) => { - metrics.get(invocation.sessionId)!.prompts++; - return null; - }, - onPreToolUse: async (_input, invocation) => { - metrics.get(invocation.sessionId)!.toolCalls++; - return { permissionDecision: "allow" }; - }, - onSessionEnd: async (input, invocation) => { - const m = metrics.get(invocation.sessionId)!; - const durationSec = (input.timestamp - m.start) / 1000; - - console.log( - `Session ${invocation.sessionId.slice(0, 8)}: ` + - `${durationSec.toFixed(1)}s, ${m.prompts} prompts, ` + - `${m.toolCalls} tool calls, ended: ${input.reason}`, - ); - - metrics.delete(invocation.sessionId); - return null; - }, + hooks: { + onSessionStart: async (input, invocation) => { + metrics.set(invocation.sessionId, { + start: input.timestamp, + toolCalls: 0, + prompts: 0, + }); + return null; }, - onPermissionRequest: async () => ({ kind: "approve-once" }), + onUserPromptSubmitted: async (_input, invocation) => { + metrics.get(invocation.sessionId)!.prompts++; + return null; + }, + onPreToolUse: async (_input, invocation) => { + metrics.get(invocation.sessionId)!.toolCalls++; + return { permissionDecision: "allow" }; + }, + onSessionEnd: async (input, invocation) => { + const m = metrics.get(invocation.sessionId)!; + const durationSec = + (input.timestamp.getTime() - m.start.getTime()) / 1000; + + console.log( + `Session ${invocation.sessionId.slice(0, 8)}: ` + + `${durationSec.toFixed(1)}s, ${m.prompts} prompts, ` + + `${m.toolCalls} tool calls, ended: ${input.reason}`, + ); + + metrics.delete(invocation.sessionId); + return null; + }, + }, + onPermissionRequest: async () => ({ kind: "approve-once" }), }); ``` @@ -957,7 +975,7 @@ async def on_pre_tool_use(input_data, invocation): async def on_session_end(input_data, invocation): m = session_metrics.pop(invocation["session_id"]) - duration = (input_data["timestamp"] - m["start"]) / 1000 + duration = (input_data["timestamp"] - m["start"]).total_seconds() sid = invocation["session_id"][:8] print( f"Session {sid}: {duration:.1f}s, {m['prompts']} prompts, " @@ -984,32 +1002,34 @@ Hooks compose naturally. A single `hooks` object can handle permissions **and** ```typescript const session = await client.createSession({ - hooks: { - onSessionStart: async (input) => { - console.log(`[audit] session started in ${input.cwd}`); - return { additionalContext: "Project uses TypeScript and Vitest." }; - }, - onPreToolUse: async (input) => { - console.log(`[audit] tool requested: ${input.toolName}`); - if (input.toolName === "shell") { - return { permissionDecision: "ask" }; - } - return { permissionDecision: "allow" }; - }, - onPostToolUse: async (input) => { - console.log(`[audit] tool completed: ${input.toolName}`); - return null; - }, - onErrorOccurred: async (input) => { - console.error(`[alert] ${input.errorContext}: ${input.error}`); - return null; - }, - onSessionEnd: async (input, invocation) => { - console.log(`[audit] session ${invocation.sessionId.slice(0, 8)} ended: ${input.reason}`); - return null; - }, + hooks: { + onSessionStart: async (input) => { + console.log(`[audit] session started in ${input.workingDirectory}`); + return { additionalContext: "Project uses TypeScript and Vitest." }; + }, + onPreToolUse: async (input) => { + console.log(`[audit] tool requested: ${input.toolName}`); + if (input.toolName === "shell") { + return { permissionDecision: "ask" }; + } + return { permissionDecision: "allow" }; + }, + onPostToolUse: async (input) => { + console.log(`[audit] tool completed: ${input.toolName}`); + return null; + }, + onErrorOccurred: async (input) => { + console.error(`[alert] ${input.errorContext}: ${input.error}`); + return null; + }, + onSessionEnd: async (input, invocation) => { + console.log( + `[audit] session ${invocation.sessionId.slice(0, 8)} ended: ${input.reason}`, + ); + return null; }, - onPermissionRequest: async () => ({ kind: "approve-once" }), + }, + onPermissionRequest: async () => ({ kind: "approve-once" }), }); ``` @@ -1031,16 +1051,16 @@ const session = await client.createSession({ For full type definitions, input/output field tables, and additional examples for every hook, see the API reference: -* [Hooks Overview](../hooks/hooks-overview.md) -* [Pre-Tool Use](../hooks/pre-tool-use.md) -* [Post-Tool Use](../hooks/post-tool-use.md) -* [User Prompt Submitted](../hooks/user-prompt-submitted.md) -* [Session Lifecycle](../hooks/session-lifecycle.md) -* [Error Handling](../hooks/error-handling.md) +- [Hooks Overview](../hooks/hooks-overview.md) +- [Pre-Tool Use](../hooks/pre-tool-use.md) +- [Post-Tool Use](../hooks/post-tool-use.md) +- [User Prompt Submitted](../hooks/user-prompt-submitted.md) +- [Session Lifecycle](../hooks/session-lifecycle.md) +- [Error Handling](../hooks/error-handling.md) ## See also -* [Getting Started](../getting-started.md) -* [Custom Agents & Sub-Agent Orchestration](./custom-agents.md) -* [Streaming Session Events](./streaming-events.md) -* [Debugging Guide](../troubleshooting/debugging.md) +- [Getting Started](../getting-started.md) +- [Custom Agents & Sub-Agent Orchestration](./custom-agents.md) +- [Streaming Session Events](./streaming-events.md) +- [Debugging Guide](../troubleshooting/debugging.md) diff --git a/docs/hooks/hooks-overview.md b/docs/hooks/hooks-overview.md index 460813c26..27628a5cc 100644 --- a/docs/hooks/hooks-overview.md +++ b/docs/hooks/hooks-overview.md @@ -13,7 +13,8 @@ Hooks allow you to intercept and customize the behavior of Copilot sessions at k | Hook | Trigger | Use Case | |------|---------|----------| | [`onPreToolUse`](./pre-tool-use.md) | Before a tool executes | Permission control, argument validation | -| [`onPostToolUse`](./post-tool-use.md) | After a tool executes | Result transformation, logging | +| [`onPostToolUse`](./post-tool-use.md) | After a tool executes (success only) | Result transformation, logging | +| [`onPostToolUseFailure`](./post-tool-use.md#failure-variant) | After a tool execution whose result was a failure | Inject retry guidance, log failures | | [`onUserPromptSubmitted`](./user-prompt-submitted.md) | When user sends a message | Prompt modification, filtering | | [`onSessionStart`](./session-lifecycle.md#session-start) | Session begins | Add context, configure session | | [`onSessionEnd`](./session-lifecycle.md#session-end) | Session ends | Cleanup, analytics | diff --git a/docs/hooks/post-tool-use.md b/docs/hooks/post-tool-use.md index f3c6f6799..96e9ee982 100644 --- a/docs/hooks/post-tool-use.md +++ b/docs/hooks/post-tool-use.md @@ -1,11 +1,14 @@ # Post-tool use hook -The `onPostToolUse` hook is called **after** a tool executes. Use it to: +The `onPostToolUse` hook is called **after** a tool executes **successfully**. Use it to: -* Transform or filter tool results -* Log tool execution for auditing -* Add context based on results -* Suppress results from the conversation +- Transform or filter tool results +- Log tool execution for auditing +- Add context based on results +- Suppress results from the conversation + +> **Failure variant** — `onPostToolUse` only fires for successful tool executions. To observe **failed** tool calls, register `onPostToolUseFailure` (`on_post_tool_use_failure` in Python, `OnPostToolUseFailure` in Go/.NET, `on_post_tool_use_failure` in Rust). The handler receives `{ sessionId, toolName, toolArgs, error, timestamp, workingDirectory }` — the `error` field is a string extracted from the tool's failure result — and may return `{ additionalContext: string }` to inject extra guidance for the model (e.g. retry hints). See the [hooks overview](./hooks-overview.md) for the full list. +> ## Hook signature @@ -13,18 +16,25 @@ The `onPostToolUse` hook is called **after** a tool executes. Use it to: Node.js / TypeScript + ```ts -import type { PostToolUseHookInput, HookInvocation, PostToolUseHookOutput } from "@github/copilot-sdk"; +import type { + PostToolUseHookInput, + HookInvocation, + PostToolUseHookOutput, +} from "@github/copilot-sdk"; type PostToolUseHandler = ( input: PostToolUseHookInput, - invocation: HookInvocation + invocation: HookInvocation, ) => Promise; ``` + + ```typescript type PostToolUseHandler = ( input: PostToolUseHookInput, - invocation: HookInvocation + invocation: HookInvocation, ) => Promise; ``` @@ -34,6 +44,7 @@ type PostToolUseHandler = ( Python + ```python from copilot.session import PostToolUseHookInput, PostToolUseHookOutput from typing import Callable, Awaitable @@ -43,7 +54,9 @@ PostToolUseHandler = Callable[ Awaitable[PostToolUseHookOutput | None] ] ``` + + ```python PostToolUseHandler = Callable[ [PostToolUseHookInput, dict[str, str]], @@ -57,6 +70,7 @@ PostToolUseHandler = Callable[ Go + ```go package main @@ -69,7 +83,9 @@ type PostToolUseHandler func( func main() {} ``` + + ```go type PostToolUseHandler func( input PostToolUseHookInput, @@ -83,6 +99,7 @@ type PostToolUseHandler func( .NET + ```csharp using GitHub.Copilot; @@ -90,7 +107,9 @@ public delegate Task PostToolUseHandler( PostToolUseHookInput input, HookInvocation invocation); ``` + + ```csharp public delegate Task PostToolUseHandler( PostToolUseHookInput input, @@ -112,23 +131,23 @@ PostToolUseHandler postToolUseHandler; ## Input -| Field | Type | Description | -|-------|------|-------------| -| `timestamp` | number | Unix timestamp when the hook was triggered | -| `cwd` | string | Current working directory | -| `toolName` | string | Name of the tool that was called | -| `toolArgs` | object | Arguments that were passed to the tool | -| `toolResult` | object | Result returned by the tool | +| Field | Type | Description | +| ------------------ | ------------------ | -------------------------------------- | +| `timestamp` | SDK timestamp type | When the hook was triggered | +| `workingDirectory` | string | Current working directory | +| `toolName` | string | Name of the tool that was called | +| `toolArgs` | object | Arguments that were passed to the tool | +| `toolResult` | object | Result returned by the tool | ## Output Return `null` or `undefined` to pass through the result unchanged. Otherwise, return an object with any of these fields: -| Field | Type | Description | -|-------|------|-------------| -| `modifiedResult` | object | Modified result to use instead of original | -| `additionalContext` | string | Extra context injected into the conversation | -| `suppressOutput` | boolean | If true, result won't appear in conversation | +| Field | Type | Description | +| ------------------- | ------- | -------------------------------------------- | +| `modifiedResult` | object | Modified result to use instead of original | +| `additionalContext` | string | Extra context injected into the conversation | +| `suppressOutput` | boolean | If true, result won't appear in conversation | ## Examples @@ -173,6 +192,7 @@ session = await client.create_session(on_permission_request=PermissionHandler.ap Go + ```go package main @@ -198,7 +218,9 @@ func main() { _ = session } ``` + + ```go session, _ := client.CreateSession(context.Background(), &copilot.SessionConfig{ Hooks: &copilot.SessionHooks{ @@ -218,6 +240,7 @@ session, _ := client.CreateSession(context.Background(), &copilot.SessionConfig{ .NET + ```csharp using GitHub.Copilot; @@ -242,7 +265,9 @@ public static class PostToolUseExample } } ``` + + ```csharp var session = await client.CreateSessionAsync(new SessionConfig { @@ -303,7 +328,7 @@ const session = await client.createSession({ for (const pattern of SENSITIVE_PATTERNS) { redacted = redacted.replace(pattern, "[REDACTED]"); } - + if (redacted !== input.toolResult) { return { modifiedResult: redacted }; } @@ -323,7 +348,7 @@ const session = await client.createSession({ hooks: { onPostToolUse: async (input) => { const resultStr = JSON.stringify(input.toolResult); - + if (resultStr.length > MAX_RESULT_LENGTH) { return { modifiedResult: { @@ -349,17 +374,19 @@ const session = await client.createSession({ // If a file read returned an error, add helpful context if (input.toolName === "read_file" && input.toolResult?.error) { return { - additionalContext: "Tip: If the file doesn't exist, consider creating it or checking the path.", + additionalContext: + "Tip: If the file doesn't exist, consider creating it or checking the path.", }; } - + // If shell command failed, add debugging hint if (input.toolName === "shell" && input.toolResult?.exitCode !== 0) { return { - additionalContext: "The command failed. Check if required dependencies are installed.", + additionalContext: + "The command failed. Check if required dependencies are installed.", }; } - + return null; }, }, @@ -392,7 +419,7 @@ const session = await client.createSession({ ```typescript interface AuditEntry { - timestamp: number; + timestamp: Date; sessionId: string; toolName: string; args: unknown; @@ -413,10 +440,10 @@ const session = await client.createSession({ result: input.toolResult, success: !input.toolResult?.error, }); - + // Optionally persist to database/file await saveAuditLog(auditLog); - + return null; }, }, @@ -433,10 +460,10 @@ const session = await client.createSession({ onPostToolUse: async (input) => { if (NOISY_TOOLS.includes(input.toolName)) { // Summarize instead of showing full result - const items = Array.isArray(input.toolResult) - ? input.toolResult + const items = Array.isArray(input.toolResult) + ? input.toolResult : input.toolResult?.items || []; - + return { modifiedResult: { summary: `Found ${items.length} items`, @@ -464,6 +491,6 @@ const session = await client.createSession({ ## See also -* [Hooks Overview](./index.md) -* [Pre-Tool Use Hook](./pre-tool-use.md) -* [Error Handling Hook](./error-handling.md) +- [Hooks Overview](./index.md) +- [Pre-Tool Use Hook](./pre-tool-use.md) +- [Error Handling Hook](./error-handling.md) diff --git a/docs/troubleshooting/compatibility.md b/docs/troubleshooting/compatibility.md index 0e7eb4768..89476b26f 100644 --- a/docs/troubleshooting/compatibility.md +++ b/docs/troubleshooting/compatibility.md @@ -64,7 +64,8 @@ The Copilot SDK communicates with the CLI via JSON-RPC protocol. Features must b | Remote HTTP/SSE | `mcpServers` config | Connect to services | | **Hooks** | | | | Pre-tool use | `onPreToolUse` | Permission, modify args | -| Post-tool use | `onPostToolUse` | Modify results | +| Post-tool use (success) | `onPostToolUse` | Modify results | +| Post-tool use (failure) | `onPostToolUseFailure` | Observe failed tool calls, inject retry guidance | | User prompt | `onUserPromptSubmitted` | Modify prompts | | Session start/end | `onSessionStart`, `onSessionEnd` | Lifecycle with source/reason | | Error handling | `onErrorOccurred` | Custom handling | diff --git a/dotnet/README.md b/dotnet/README.md index a9527f447..719c554f4 100644 --- a/dotnet/README.md +++ b/dotnet/README.md @@ -859,6 +859,19 @@ var session = await client.CreateSessionAsync(new SessionConfig }; }, + // Called when a tool execution result was a failure. OnPostToolUse only + // fires on success, so register OnPostToolUseFailure to observe failed + // tool calls. The CLI extracts the failure message and passes it as + // input.Error. + OnPostToolUseFailure = async (input, invocation) => + { + Console.WriteLine($"Tool {input.ToolName} failed: {input.Error}"); + return new PostToolUseFailureHookOutput + { + AdditionalContext = $"Retry guidance for {input.ToolName}" + }; + }, + // Called when user submits a prompt OnUserPromptSubmitted = async (input, invocation) => { @@ -902,7 +915,8 @@ var session = await client.CreateSessionAsync(new SessionConfig **Available hooks:** - `OnPreToolUse` - Intercept tool calls before execution. Can allow/deny or modify arguments. -- `OnPostToolUse` - Process tool results after execution. Can modify results or add context. +- `OnPostToolUse` - Process tool results after successful execution. Can modify results or add context. +- `OnPostToolUseFailure` - Observe failed tool executions and inject extra context to guide the model's next step. - `OnUserPromptSubmitted` - Intercept user prompts. Can modify the prompt before processing. - `OnSessionStart` - Run logic when a session starts or resumes. - `OnSessionEnd` - Cleanup or logging when session ends. diff --git a/dotnet/src/Client.cs b/dotnet/src/Client.cs index 0e7730690..bfdef54b6 100644 --- a/dotnet/src/Client.cs +++ b/dotnet/src/Client.cs @@ -527,6 +527,7 @@ public async Task CreateSessionAsync(SessionConfig config, Cance config.Hooks.OnPreToolUse != null || config.Hooks.OnPreMcpToolCall != null || config.Hooks.OnPostToolUse != null || + config.Hooks.OnPostToolUseFailure != null || config.Hooks.OnUserPromptSubmitted != null || config.Hooks.OnSessionStart != null || config.Hooks.OnSessionEnd != null || @@ -694,6 +695,7 @@ public async Task ResumeSessionAsync(string sessionId, ResumeSes config.Hooks.OnPreToolUse != null || config.Hooks.OnPreMcpToolCall != null || config.Hooks.OnPostToolUse != null || + config.Hooks.OnPostToolUseFailure != null || config.Hooks.OnUserPromptSubmitted != null || config.Hooks.OnSessionStart != null || config.Hooks.OnSessionEnd != null || diff --git a/dotnet/src/Session.cs b/dotnet/src/Session.cs index bd2309187..c905d3178 100644 --- a/dotnet/src/Session.cs +++ b/dotnet/src/Session.cs @@ -1389,6 +1389,11 @@ internal void RegisterHooks(SessionHooks hooks) JsonSerializer.Deserialize(input.GetRawText(), SessionJsonContext.Default.PostToolUseHookInput)!, invocation) : null, + "postToolUseFailure" => hooks.OnPostToolUseFailure != null + ? await hooks.OnPostToolUseFailure( + JsonSerializer.Deserialize(input.GetRawText(), SessionJsonContext.Default.PostToolUseFailureHookInput)!, + invocation) + : null, "userPromptSubmitted" => hooks.OnUserPromptSubmitted != null ? await hooks.OnUserPromptSubmitted( JsonSerializer.Deserialize(input.GetRawText(), SessionJsonContext.Default.UserPromptSubmittedHookInput)!, @@ -1752,6 +1757,8 @@ internal void ThrowIfDisposed() [JsonSerializable(typeof(ExitPlanModeResult))] [JsonSerializable(typeof(GetMessagesRequest))] [JsonSerializable(typeof(GetMessagesResponse))] + [JsonSerializable(typeof(PostToolUseFailureHookInput))] + [JsonSerializable(typeof(PostToolUseFailureHookOutput))] [JsonSerializable(typeof(PostToolUseHookInput))] [JsonSerializable(typeof(PostToolUseHookOutput))] [JsonSerializable(typeof(PreMcpToolCallHookInput))] diff --git a/dotnet/src/Types.cs b/dotnet/src/Types.cs index a02a5db3a..914179988 100644 --- a/dotnet/src/Types.cs +++ b/dotnet/src/Types.cs @@ -1311,6 +1311,68 @@ public sealed class PostToolUseHookOutput public bool? SuppressOutput { get; set; } } +/// +/// Input for a post-tool-use-failure hook. +/// +/// Fires after a tool execution whose result was "failure". The CLI extracts +/// the failure message from the tool result and passes it as the +/// field (rather than passing the full result object). +/// +public sealed class PostToolUseFailureHookInput +{ + /// + /// The runtime session ID of the session that triggered the hook. + /// + [JsonPropertyName("sessionId")] + public string SessionId { get; set; } = string.Empty; + + /// + /// Unix timestamp in milliseconds when the tool execution completed. + /// + [JsonPropertyName("timestamp")] + [JsonConverter(typeof(UnixMillisecondsDateTimeOffsetConverter))] + public DateTimeOffset Timestamp { get; set; } + + /// + /// Current working directory of the session. + /// + [JsonPropertyName("cwd")] + public string WorkingDirectory { get; set; } = string.Empty; + + /// + /// Name of the tool that failed. + /// + [JsonPropertyName("toolName")] + public string ToolName { get; set; } = string.Empty; + + /// + /// Arguments that were passed to the tool. + /// + [JsonPropertyName("toolArgs")] + public JsonElement? ToolArgs { get; set; } + + /// + /// Failure message extracted from the tool's result. + /// + [JsonPropertyName("error")] + public string Error { get; set; } = string.Empty; +} + +/// +/// Output for a post-tool-use-failure hook. +/// +/// Only is consumed by the host CLI — it is +/// appended as hidden guidance to the model alongside the failed tool result. +/// +public sealed class PostToolUseFailureHookOutput +{ + /// + /// Additional context to inject into the conversation for the language model. + /// + [JsonPropertyName("additionalContext")] + public string? AdditionalContext { get; set; } +} + /// /// Input for a user-prompt-submitted hook. /// @@ -1604,6 +1666,13 @@ public sealed class SessionHooks /// public Func>? OnPostToolUse { get; set; } + /// + /// Handler called after a tool execution whose result was a failure. + /// only fires for successful tool executions; + /// register this handler in addition to observe failed tool calls. + /// + public Func>? OnPostToolUseFailure { get; set; } + /// /// Handler called when the user submits a prompt. /// diff --git a/dotnet/test/E2E/HookLifecycleAndOutputE2ETests.cs b/dotnet/test/E2E/HookLifecycleAndOutputE2ETests.cs index d4304191c..19704f8fd 100644 --- a/dotnet/test/E2E/HookLifecycleAndOutputE2ETests.cs +++ b/dotnet/test/E2E/HookLifecycleAndOutputE2ETests.cs @@ -3,7 +3,6 @@ *--------------------------------------------------------------------------------------------*/ using Microsoft.Extensions.AI; -using System.Text.Json; using Xunit; using Xunit.Abstractions; @@ -11,11 +10,11 @@ namespace GitHub.Copilot.Test.E2E; /// /// E2E coverage for every handler exposed on : -/// OnPreToolUse, OnPostToolUse, OnUserPromptSubmitted, OnSessionStart, OnSessionEnd, -/// OnErrorOccurred. Output-shape behavior (modifiedPrompt / additionalContext / -/// errorHandling / modifiedArgs / modifiedResult / sessionSummary) is asserted alongside -/// hook invocation. If a new handler is added to SessionHooks, add a corresponding -/// test here. +/// OnPreToolUse, OnPostToolUse, OnPostToolUseFailure, OnUserPromptSubmitted, +/// OnSessionStart, OnSessionEnd, OnErrorOccurred. Output-shape behavior +/// (modifiedPrompt / additionalContext / errorHandling / modifiedArgs / +/// modifiedResult / sessionSummary) is asserted alongside hook invocation. If a +/// new handler is added to SessionHooks, add a corresponding test here. /// public class HookLifecycleAndOutputE2ETests(E2ETestFixture fixture, ITestOutputHelper output) : E2ETestBase(fixture, "hooks_extended", output) @@ -343,4 +342,55 @@ public async Task Should_Allow_PostToolUse_To_Return_ModifiedResult() Assert.Contains(inputs, input => input.ToolName == "report_intent"); Assert.Equal("Done.", response?.Data.Content); } + + [Fact] + public async Task Should_Invoke_PostToolUseFailure_Hook_For_Failed_Tool_Result() + { + var failureInputs = new List(); + var postToolUseInputs = new List(); + CopilotSession? session = null; + session = await CreateSessionAsync(new SessionConfig + { + OnPermissionRequest = PermissionHandler.ApproveAll, + AvailableTools = ["report_intent"], + Hooks = new SessionHooks + { + OnPostToolUse = (input, invocation) => + { + postToolUseInputs.Add(input); + return Task.FromResult(null); + }, + OnPostToolUseFailure = (input, invocation) => + { + failureInputs.Add(input); + Assert.Equal(session!.SessionId, invocation.SessionId); + return Task.FromResult(new PostToolUseFailureHookOutput + { + AdditionalContext = "HOOK_FAILURE_GUIDANCE_APPLIED", + }); + }, + }, + }); + + var response = await session.SendAndWaitAsync(new MessageOptions + { + Prompt = "Call the view tool with path 'missing.txt'. If it fails, use the hook guidance to answer.", + }); + + Assert.Empty(postToolUseInputs); + var input = Assert.Single(failureInputs); + Assert.Equal("view", input.ToolName); + Assert.Contains("does not exist", input.Error); + Assert.NotNull(input.ToolArgs); + Assert.True(input.Timestamp > DateTimeOffset.UnixEpoch); + Assert.False(string.IsNullOrEmpty(input.WorkingDirectory)); + Assert.Contains("HOOK_FAILURE_GUIDANCE_APPLIED", response?.Data.Content ?? string.Empty); + + var exchanges = await WaitForExchangesAsync(2); + var toolMessage = exchanges[^1].Request.Messages.Single(message => message.Role == "tool"); + Assert.Contains("does not exist", toolMessage.StringContent); + Assert.Contains( + exchanges[^1].Request.Messages, + message => (message.StringContent ?? string.Empty).Contains("HOOK_FAILURE_GUIDANCE_APPLIED", StringComparison.Ordinal)); + } } diff --git a/go/README.md b/go/README.md index da77033f8..568d75f9d 100644 --- a/go/README.md +++ b/go/README.md @@ -142,7 +142,7 @@ Event types: `SessionLifecycleCreated`, `SessionLifecycleDeleted`, `SessionLifec - `UriConnection{URL, ConnectionToken}` — connect to an already-running runtime (no process spawned) When `Path` is empty for stdio/tcp, the SDK uses the bundled CLI (or `COPILOT_CLI_PATH` env var). -- `Cwd` (string): Working directory for the runtime process +- `WorkingDirectory` (string): Working directory for the runtime process - `BaseDirectory` (string): Base directory for Copilot data (session state, config, etc.). Sets `COPILOT_HOME` on the spawned runtime. When empty, the runtime defaults to `~/.copilot`. Ignored with `UriConnection`. This does **not** affect where the Go SDK extracts the embedded CLI binary; use `embeddedcli.Config.Dir` for the extraction/cache location. - `LogLevel` (string): Log level. When empty (default), the runtime uses its own default level (the SDK does not pass `--log-level`). - `Env` ([]string): Environment variables for the runtime process (default: inherits from current process) @@ -695,6 +695,17 @@ session, err := client.CreateSession(context.Background(), &copilot.SessionConfi }, nil }, + // Called when a tool execution result was a failure. OnPostToolUse only + // fires on success, so register OnPostToolUseFailure to observe failed + // tool calls. The CLI extracts the failure message and passes it as + // input.Error. + OnPostToolUseFailure: func(input copilot.PostToolUseFailureHookInput, invocation copilot.HookInvocation) (*copilot.PostToolUseFailureHookOutput, error) { + fmt.Printf("Tool %s failed: %s\n", input.ToolName, input.Error) + return &copilot.PostToolUseFailureHookOutput{ + AdditionalContext: fmt.Sprintf("Retry guidance for %s", input.ToolName), + }, nil + }, + // Called when user submits a prompt OnUserPromptSubmitted: func(input copilot.UserPromptSubmittedHookInput, invocation copilot.HookInvocation) (*copilot.UserPromptSubmittedHookOutput, error) { fmt.Printf("User prompt: %s\n", input.Prompt) @@ -731,7 +742,8 @@ session, err := client.CreateSession(context.Background(), &copilot.SessionConfi **Available hooks:** - `OnPreToolUse` - Intercept tool calls before execution. Can allow/deny or modify arguments. -- `OnPostToolUse` - Process tool results after execution. Can modify results or add context. +- `OnPostToolUse` - Process tool results after successful execution. Can modify results or add context. +- `OnPostToolUseFailure` - Observe failed tool executions and inject extra context to guide the model's next step. - `OnUserPromptSubmitted` - Intercept user prompts. Can modify the prompt before processing. - `OnSessionStart` - Run logic when a session starts or resumes. - `OnSessionEnd` - Cleanup or logging when session ends. diff --git a/go/client.go b/go/client.go index 6e7557b0b..d9ba9e1ac 100644 --- a/go/client.go +++ b/go/client.go @@ -662,6 +662,7 @@ func (c *Client) CreateSession(ctx context.Context, config *SessionConfig) (*Ses if config.Hooks != nil && (config.Hooks.OnPreToolUse != nil || config.Hooks.OnPreMcpToolCall != nil || config.Hooks.OnPostToolUse != nil || + config.Hooks.OnPostToolUseFailure != nil || config.Hooks.OnUserPromptSubmitted != nil || config.Hooks.OnSessionStart != nil || config.Hooks.OnSessionEnd != nil || @@ -820,6 +821,7 @@ func (c *Client) ResumeSessionWithOptions(ctx context.Context, sessionID string, if config.Hooks != nil && (config.Hooks.OnPreToolUse != nil || config.Hooks.OnPreMcpToolCall != nil || config.Hooks.OnPostToolUse != nil || + config.Hooks.OnPostToolUseFailure != nil || config.Hooks.OnUserPromptSubmitted != nil || config.Hooks.OnSessionStart != nil || config.Hooks.OnSessionEnd != nil || diff --git a/go/internal/e2e/hooks_extended_e2e_test.go b/go/internal/e2e/hooks_extended_e2e_test.go index 677de58b8..5c049da8a 100644 --- a/go/internal/e2e/hooks_extended_e2e_test.go +++ b/go/internal/e2e/hooks_extended_e2e_test.go @@ -1,6 +1,7 @@ package e2e import ( + "fmt" "strings" "sync" "testing" @@ -12,11 +13,12 @@ import ( // Mirrors dotnet/test/HookLifecycleAndOutputTests.cs (snapshot category "hooks_extended"). // -// Covers each handler exposed on copilot.SessionHooks: OnPreToolUse, OnPostToolUse, -// OnUserPromptSubmitted, OnSessionStart, OnSessionEnd, OnErrorOccurred. Output-shape -// behavior (modifiedPrompt / additionalContext / errorHandling / modifiedArgs / -// modifiedResult / sessionSummary) is asserted alongside hook invocation. If a new -// handler is added to SessionHooks, add a corresponding test here. +// Covers each handler exposed on copilot.SessionHooks: OnPreToolUse, +// OnPostToolUse, OnPostToolUseFailure, OnUserPromptSubmitted, OnSessionStart, +// OnSessionEnd, OnErrorOccurred. Output-shape behavior (modifiedPrompt / +// additionalContext / errorHandling / modifiedArgs / modifiedResult / +// sessionSummary) is asserted alongside hook invocation. If a new handler is +// added to SessionHooks, add a corresponding test here. func TestHooksExtendedE2E(t *testing.T) { ctx := testharness.NewTestContext(t) client := ctx.NewClient() @@ -340,4 +342,76 @@ func TestHooksExtendedE2E(t *testing.T) { t.Errorf("Expected response content to be 'Done.', got %v", response.Data) } }) + + t.Run("should invoke postToolUseFailure hook for failed tool result", func(t *testing.T) { + ctx.ConfigureForTest(t) + + var ( + mu sync.Mutex + failureInputs []copilot.PostToolUseFailureHookInput + postToolUseInputs []copilot.PostToolUseHookInput + ) + + session, err := client.CreateSession(t.Context(), &copilot.SessionConfig{ + OnPermissionRequest: copilot.PermissionHandler.ApproveAll, + AvailableTools: []string{"report_intent"}, + Hooks: &copilot.SessionHooks{ + OnPostToolUse: func(input copilot.PostToolUseHookInput, invocation copilot.HookInvocation) (*copilot.PostToolUseHookOutput, error) { + mu.Lock() + postToolUseInputs = append(postToolUseInputs, input) + mu.Unlock() + return nil, nil + }, + OnPostToolUseFailure: func(input copilot.PostToolUseFailureHookInput, invocation copilot.HookInvocation) (*copilot.PostToolUseFailureHookOutput, error) { + mu.Lock() + failureInputs = append(failureInputs, input) + mu.Unlock() + if invocation.SessionID == "" { + t.Error("Expected non-empty session ID in invocation") + } + return &copilot.PostToolUseFailureHookOutput{ + AdditionalContext: "HOOK_FAILURE_GUIDANCE_APPLIED", + }, nil + }, + }, + }) + if err != nil { + t.Fatalf("Failed to create session: %v", err) + } + + response, err := session.SendAndWait(t.Context(), copilot.MessageOptions{ + Prompt: "Call the view tool with path 'missing.txt'. If it fails, use the hook guidance to answer.", + }) + if err != nil { + t.Fatalf("Failed to send message: %v", err) + } + + mu.Lock() + defer mu.Unlock() + if len(postToolUseInputs) != 0 { + t.Fatalf("Expected postToolUse not to fire for failed result, got %+v", postToolUseInputs) + } + if len(failureInputs) != 1 { + t.Fatalf("Expected one postToolUseFailure input, got %+v", failureInputs) + } + input := failureInputs[0] + if input.ToolName != "view" { + t.Errorf("Expected tool name view, got %q", input.ToolName) + } + if !strings.Contains(input.Error, "does not exist") { + t.Errorf("Expected missing-tool error, got %q", input.Error) + } + if !strings.Contains(fmt.Sprint(input.ToolArgs), "missing.txt") { + t.Errorf("Expected tool args to contain missing.txt, got %+v", input.ToolArgs) + } + if input.WorkingDirectory == "" { + t.Error("Expected working directory to be populated") + } + if input.Timestamp.IsZero() { + t.Error("Expected timestamp to be populated") + } + if assistantMessage, ok := response.Data.(*copilot.AssistantMessageData); !ok || !strings.Contains(assistantMessage.Content, "HOOK_FAILURE_GUIDANCE_APPLIED") { + t.Errorf("Expected response to contain hook guidance, got %v", response.Data) + } + }) } diff --git a/go/session.go b/go/session.go index eca928c19..9beab8709 100644 --- a/go/session.go +++ b/go/session.go @@ -521,6 +521,16 @@ func (s *Session) handleHooksInvoke(hookType string, rawInput json.RawMessage) ( } return hooks.OnPostToolUse(input, invocation) + case "postToolUseFailure": + if hooks.OnPostToolUseFailure == nil { + return nil, nil + } + var input PostToolUseFailureHookInput + if err := json.Unmarshal(rawInput, &input); err != nil { + return nil, fmt.Errorf("invalid hook input: %w", err) + } + return hooks.OnPostToolUseFailure(input, invocation) + case "userPromptSubmitted": if hooks.OnUserPromptSubmitted == nil { return nil, nil diff --git a/go/session_test.go b/go/session_test.go index 16ac64273..b107fb62c 100644 --- a/go/session_test.go +++ b/go/session_test.go @@ -553,6 +553,72 @@ func TestSession_ElicitationHandler(t *testing.T) { }) } +func TestSession_PostToolUseFailureHook(t *testing.T) { + t.Run("dispatches with parsed input and returns additional context", func(t *testing.T) { + session, cleanup := newTestSession() + defer cleanup() + + var captured PostToolUseFailureHookInput + session.registerHooks(&SessionHooks{ + OnPostToolUseFailure: func(input PostToolUseFailureHookInput, _ HookInvocation) (*PostToolUseFailureHookOutput, error) { + captured = input + return &PostToolUseFailureHookOutput{ + AdditionalContext: "extra-context: " + input.Error, + }, nil + }, + }) + + raw := json.RawMessage(`{ + "sessionId": "sess-1", + "timestamp": 1700000000, + "cwd": "/work", + "toolName": "tool-x", + "toolArgs": {"foo": "bar"}, + "error": "boom" + }`) + output, err := session.handleHooksInvoke("postToolUseFailure", raw) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if captured.SessionID != "sess-1" { + t.Errorf("expected sessionId 'sess-1', got %q", captured.SessionID) + } + if captured.ToolName != "tool-x" { + t.Errorf("expected toolName 'tool-x', got %q", captured.ToolName) + } + if captured.Error != "boom" { + t.Errorf("expected error 'boom', got %q", captured.Error) + } + if !captured.Timestamp.Equal(time.UnixMilli(1700000000)) { + t.Errorf("expected timestamp %v, got %v", time.UnixMilli(1700000000), captured.Timestamp) + } + if captured.WorkingDirectory != "/work" { + t.Errorf("expected WorkingDirectory '/work', got %q", captured.WorkingDirectory) + } + out, ok := output.(*PostToolUseFailureHookOutput) + if !ok { + t.Fatalf("expected *PostToolUseFailureHookOutput, got %T", output) + } + if out.AdditionalContext != "extra-context: boom" { + t.Errorf("unexpected AdditionalContext: %q", out.AdditionalContext) + } + }) + + t.Run("no handler registered returns nil without error", func(t *testing.T) { + session, cleanup := newTestSession() + defer cleanup() + session.registerHooks(&SessionHooks{}) + + output, err := session.handleHooksInvoke("postToolUseFailure", json.RawMessage(`{"sessionId":"sess-1","timestamp":0,"cwd":"","toolName":"t","toolArgs":null,"error":"e"}`)) + if err != nil { + t.Errorf("unexpected error: %v", err) + } + if output != nil { + t.Errorf("expected nil output, got %v", output) + } + }) +} + func TestSession_HookForwardCompatibility(t *testing.T) { t.Run("unknown hook type returns nil without error when known hooks are registered", func(t *testing.T) { session, cleanup := newTestSession() @@ -567,9 +633,9 @@ func TestSession_HookForwardCompatibility(t *testing.T) { }, }) - // "postToolUseFailure" is an example of a hook type introduced by a newer - // CLI version that the SDK does not yet know about. - output, err := session.handleHooksInvoke("postToolUseFailure", json.RawMessage(`{}`)) + // "futureUnknownHookType" stands in for a hook type introduced by a + // newer CLI version that the SDK does not yet know about. + output, err := session.handleHooksInvoke("futureUnknownHookType", json.RawMessage(`{}`)) if err != nil { t.Errorf("Expected no error for unknown hook type, got: %v", err) } diff --git a/go/types.go b/go/types.go index fe7f9d93c..46ac3c1b2 100644 --- a/go/types.go +++ b/go/types.go @@ -446,6 +446,55 @@ type PostToolUseHookOutput struct { // PostToolUseHandler handles post-tool-use hook invocations type PostToolUseHandler func(input PostToolUseHookInput, invocation HookInvocation) (*PostToolUseHookOutput, error) +// PostToolUseFailureHookInput is the input for a post-tool-use-failure hook. +// +// Fires after a tool execution whose result was "failure". The CLI extracts +// the failure message from the tool result and passes it as the Error field +// (rather than passing the full result object). +type PostToolUseFailureHookInput struct { + SessionID string `json:"sessionId"` + Timestamp time.Time `json:"-"` + WorkingDirectory string `json:"cwd"` + ToolName string `json:"toolName"` + ToolArgs any `json:"toolArgs"` + // Error is the failure message from the tool's result. + Error string `json:"error"` +} + +// MarshalJSON implements json.Marshaler, emitting Timestamp as Unix milliseconds. +func (h PostToolUseFailureHookInput) MarshalJSON() ([]byte, error) { + type alias PostToolUseFailureHookInput + return json.Marshal(&struct { + Timestamp int64 `json:"timestamp"` + alias + }{Timestamp: h.Timestamp.UnixMilli(), alias: alias(h)}) +} + +// UnmarshalJSON implements json.Unmarshaler, parsing Timestamp from Unix milliseconds. +func (h *PostToolUseFailureHookInput) UnmarshalJSON(data []byte) error { + type alias PostToolUseFailureHookInput + aux := &struct { + Timestamp int64 `json:"timestamp"` + *alias + }{alias: (*alias)(h)} + if err := json.Unmarshal(data, aux); err != nil { + return err + } + h.Timestamp = time.UnixMilli(aux.Timestamp) + return nil +} + +// PostToolUseFailureHookOutput is the output for a post-tool-use-failure hook. +// +// Only AdditionalContext is consumed by the host CLI — it is appended as +// hidden guidance to the model alongside the failed tool result. +type PostToolUseFailureHookOutput struct { + AdditionalContext string `json:"additionalContext,omitempty"` +} + +// PostToolUseFailureHandler handles post-tool-use-failure hook invocations. +type PostToolUseFailureHandler func(input PostToolUseFailureHookInput, invocation HookInvocation) (*PostToolUseFailureHookOutput, error) + // UserPromptSubmittedHookInput is the input for a user-prompt-submitted hook type UserPromptSubmittedHookInput struct { SessionID string `json:"sessionId"` @@ -667,6 +716,7 @@ type HookInvocation struct { type SessionHooks struct { OnPreToolUse PreToolUseHandler OnPostToolUse PostToolUseHandler + OnPostToolUseFailure PostToolUseFailureHandler OnUserPromptSubmitted UserPromptSubmittedHandler OnSessionStart SessionStartHandler OnSessionEnd SessionEndHandler diff --git a/nodejs/README.md b/nodejs/README.md index 1cb6e7836..aadf7c677 100644 --- a/nodejs/README.md +++ b/nodejs/README.md @@ -957,7 +957,7 @@ const session = await client.createSession({ }; }, - // Called after each tool execution + // Called after each successful tool execution onPostToolUse: async (input, invocation) => { console.log(`Tool ${input.toolName} completed`); // Optionally modify the result or add context @@ -966,6 +966,16 @@ const session = await client.createSession({ }; }, + // Called after a tool execution whose result was "failure". + // onPostToolUse does NOT fire for failed tool calls — register this + // hook to observe them. Input includes `error` (the failure message + // extracted from the tool's result), not the full result object. + onPostToolUseFailure: async (input, invocation) => { + console.log(`Tool ${input.toolName} failed: ${input.error}`); + // Optionally append hidden guidance to the model. + return { additionalContext: "Suggest checking inputs and retrying." }; + }, + // Called when user submits a prompt onUserPromptSubmitted: async (input, invocation) => { console.log(`User prompt: ${input.prompt}`); @@ -1001,7 +1011,8 @@ const session = await client.createSession({ **Available hooks:** - `onPreToolUse` - Intercept tool calls before execution. Can allow/deny or modify arguments. -- `onPostToolUse` - Process tool results after execution. Can modify results or add context. +- `onPostToolUse` - Process tool results after **successful** execution. Can modify results or add context. +- `onPostToolUseFailure` - Observe and append hidden guidance to the model after tool executions whose result was `"failure"`. Register this in addition to `onPostToolUse` to see failed tool calls. - `onUserPromptSubmitted` - Intercept user prompts. Can modify the prompt before processing. - `onSessionStart` - Run logic when a session starts or resumes. - `onSessionEnd` - Cleanup or logging when session ends. diff --git a/nodejs/docs/agent-author.md b/nodejs/docs/agent-author.md index 787bb6a32..907181442 100644 --- a/nodejs/docs/agent-author.md +++ b/nodejs/docs/agent-author.md @@ -118,19 +118,20 @@ hooks: { onUserPromptSubmitted: async (input, invocation) => { ... }, onPreToolUse: async (input, invocation) => { ... }, onPostToolUse: async (input, invocation) => { ... }, + onPostToolUseFailure: async (input, invocation) => { ... }, onSessionStart: async (input, invocation) => { ... }, onSessionEnd: async (input, invocation) => { ... }, onErrorOccurred: async (input, invocation) => { ... }, } ``` -All hook inputs include `timestamp` (unix ms) and `cwd` (working directory). +All hook inputs include `timestamp` (`Date`) and `workingDirectory`. All handlers receive `invocation: { sessionId: string }` as the second argument. All handlers may return `void`/`undefined` (no-op) or an output object. ### onUserPromptSubmitted -**Input:** `{ prompt: string, timestamp, cwd }` +**Input:** `{ prompt: string, timestamp, workingDirectory }` **Output (all fields optional):** | Field | Type | Effect | @@ -140,7 +141,7 @@ All handlers may return `void`/`undefined` (no-op) or an output object. ### onPreToolUse -**Input:** `{ toolName: string, toolArgs: unknown, timestamp, cwd }` +**Input:** `{ toolName: string, toolArgs: unknown, timestamp, workingDirectory }` **Output (all fields optional):** | Field | Type | Effect | @@ -152,7 +153,10 @@ All handlers may return `void`/`undefined` (no-op) or an output object. ### onPostToolUse -**Input:** `{ toolName: string, toolArgs: unknown, toolResult: ToolResultObject, timestamp, cwd }` +**Input:** `{ toolName: string, toolArgs: unknown, toolResult: ToolResultObject, timestamp, workingDirectory }` + +Fires only when the tool returned a successful result. To observe non-success +outcomes, register `onPostToolUseFailure` as well. **Output (all fields optional):** | Field | Type | Effect | @@ -160,9 +164,29 @@ All handlers may return `void`/`undefined` (no-op) or an output object. | `modifiedResult` | `ToolResultObject` | Replaces the tool result | | `additionalContext` | `string` | Injected into the conversation | +### onPostToolUseFailure + +**Input:** `{ toolName: string, toolArgs: unknown, error: string, timestamp, workingDirectory }` + +Fires after a tool execution whose result was `"failure"`. `onPostToolUse` +does **not** fire for these outcomes, so register this handler to observe or +react to them — useful for telemetry, replay buffers, fault-injection tests, +or pairing pre/post tool tracking that would otherwise leak when the tool +fails. Note the input shape differs from `onPostToolUse`: only `error` (the +stringified failure message) is provided, not the full `toolResult`. + +**Output (all fields optional):** +| Field | Type | Effect | +|-------|------|--------| +| `additionalContext` | `string` | Appended as hidden guidance the model sees alongside the failed tool result | + +Note: only `"failure"` results trigger this hook. Other non-success +`resultType` values (`"rejected"`, `"denied"`, `"timeout"`) do not currently +fire it. + ### onSessionStart -**Input:** `{ source: "startup" \| "resume" \| "new", initialPrompt?: string, timestamp, cwd }` +**Input:** `{ source: "startup" \| "resume" \| "new", initialPrompt?: string, timestamp, workingDirectory }` **Output (all fields optional):** | Field | Type | Effect | @@ -171,7 +195,7 @@ All handlers may return `void`/`undefined` (no-op) or an output object. ### onSessionEnd -**Input:** `{ reason: "complete" \| "error" \| "abort" \| "timeout" \| "user_exit", finalMessage?: string, error?: string, timestamp, cwd }` +**Input:** `{ reason: "complete" \| "error" \| "abort" \| "timeout" \| "user_exit", finalMessage?: string, error?: string, timestamp, workingDirectory }` **Output (all fields optional):** | Field | Type | Effect | @@ -181,7 +205,7 @@ All handlers may return `void`/`undefined` (no-op) or an output object. ### onErrorOccurred -**Input:** `{ error: string, errorContext: "model_call" \| "tool_execution" \| "system" \| "user_input", recoverable: boolean, timestamp, cwd }` +**Input:** `{ error: string, errorContext: "model_call" \| "tool_execution" \| "system" \| "user_input", recoverable: boolean, timestamp, workingDirectory }` **Output (all fields optional):** | Field | Type | Effect | diff --git a/nodejs/docs/examples.md b/nodejs/docs/examples.md index c4b8acb1c..a2b106a48 100644 --- a/nodejs/docs/examples.md +++ b/nodejs/docs/examples.md @@ -152,16 +152,17 @@ Hooks intercept and modify behavior at key lifecycle points. Register them in th ### Available Hooks -| Hook | Fires When | Can Modify | -| ----------------------- | ------------------------- | ------------------------------------------- | -| `onUserPromptSubmitted` | User sends a message | The prompt text, add context | -| `onPreToolUse` | Before a tool executes | Tool args, permission decision, add context | -| `onPostToolUse` | After a tool executes | Tool result, add context | -| `onSessionStart` | Session starts or resumes | Add context, modify config | -| `onSessionEnd` | Session ends | Cleanup actions, summary | -| `onErrorOccurred` | An error occurs | Error handling strategy (retry/skip/abort) | - -All hook inputs include `timestamp` (unix ms) and `cwd` (working directory). +| Hook | Fires When | Can Modify | +| ----------------------- | ---------------------------------------- | ------------------------------------------- | +| `onUserPromptSubmitted` | User sends a message | The prompt text, add context | +| `onPreToolUse` | Before a tool executes | Tool args, permission decision, add context | +| `onPostToolUse` | After a tool executes successfully | Tool result, add context | +| `onPostToolUseFailure` | After a tool execution returns a failure | Add hidden guidance to the model | +| `onSessionStart` | Session starts or resumes | Add context, modify config | +| `onSessionEnd` | Session ends | Cleanup actions, summary | +| `onErrorOccurred` | An error occurs | Error handling strategy (retry/skip/abort) | + +All hook inputs include `timestamp` (`Date`) and `workingDirectory`. ### Modifying the user's message @@ -267,12 +268,18 @@ hooks: { } ``` -### Augmenting tool results with extra context +### Reacting when a tool fails + +`onPostToolUse` only fires for successful tool executions. To observe or react +to failures, register `onPostToolUseFailure`. The input includes +`input.error` (the stringified failure message); only `additionalContext` on +the return value is consumed by the runtime, and it is appended as hidden +guidance alongside the failed tool result. ```js hooks: { - onPostToolUse: async (input) => { - if (input.toolName === "bash" && input.toolResult?.resultType === "failure") { + onPostToolUseFailure: async (input) => { + if (input.toolName === "bash") { return { additionalContext: "The command failed. Try a different approach.", }; diff --git a/nodejs/src/session.ts b/nodejs/src/session.ts index 74823602e..e908c766e 100644 --- a/nodejs/src/session.ts +++ b/nodejs/src/session.ts @@ -53,8 +53,8 @@ import type { /** * Convert a raw hook input received over the wire into its public-facing shape. - * Currently this only deserializes the numeric Unix-ms `timestamp` field on - * BaseHookInput into a Date. Anything else passes through unchanged. + * This deserializes the numeric Unix-ms `timestamp` field on BaseHookInput + * into a Date and maps the wire `cwd` field to `workingDirectory`. */ function deserializeHookInput(raw: unknown): unknown { if ( @@ -1007,6 +1007,7 @@ export class CopilotSession { preToolUse: this.hooks.onPreToolUse as GenericHandler | undefined, preMcpToolCall: this.hooks.onPreMcpToolCall as GenericHandler | undefined, postToolUse: this.hooks.onPostToolUse as GenericHandler | undefined, + postToolUseFailure: this.hooks.onPostToolUseFailure as GenericHandler | undefined, userPromptSubmitted: this.hooks.onUserPromptSubmitted as GenericHandler | undefined, sessionStart: this.hooks.onSessionStart as GenericHandler | undefined, sessionEnd: this.hooks.onSessionEnd as GenericHandler | undefined, diff --git a/nodejs/src/types.ts b/nodejs/src/types.ts index 623a4cabd..938a7f2fc 100644 --- a/nodejs/src/types.ts +++ b/nodejs/src/types.ts @@ -1109,6 +1109,51 @@ export type PostToolUseHandler = ( invocation: { sessionId: string } ) => Promise | PostToolUseHookOutput | void; +/** + * Input for post-tool-use-failure hook. + * + * Dispatched after a tool execution whose `resultType` is `"failure"`. + * The input differs from {@link PostToolUseHookInput}: the host CLI does not + * forward the full `ToolResultObject` to failure hooks — only `error`, the + * stringified failure message extracted from the tool's result, is provided. + */ +export interface PostToolUseFailureHookInput extends BaseHookInput { + toolName: string; + toolArgs: unknown; + /** + * Failure message from the tool's result (the `error` field of the + * underlying `ToolResultObject`, falling back to its text/log fields). + */ + error: string; +} + +/** + * Output for post-tool-use-failure hook. + * + * Only `additionalContext` is consumed by the host CLI — it is appended as + * hidden guidance to the model alongside the failed tool result. Other fields + * such as `modifiedResult` or `suppressOutput` are not honored for failure + * hooks (see {@link PostToolUseHookOutput} for the success-only hook). + */ +export interface PostToolUseFailureHookOutput { + additionalContext?: string; +} + +/** + * Handler for post-tool-use-failure hook. + * + * Fires after a tool execution whose result was `"failure"`. `onPostToolUse` + * only fires for successful results, so register this handler to observe or + * react to failed tool outcomes. + * + * Note: `"rejected"`, `"denied"`, and `"timeout"` results do not currently + * trigger this hook either — only `"failure"` does. + */ +export type PostToolUseFailureHandler = ( + input: PostToolUseFailureHookInput, + invocation: { sessionId: string } +) => Promise | PostToolUseFailureHookOutput | void; + /** * Input for user-prompt-submitted hook */ @@ -1225,10 +1270,22 @@ export interface SessionHooks { onPreMcpToolCall?: PreMcpToolCallHandler; /** - * Called after a tool is executed + * Called after a tool is executed with a successful result. + * + * For failed tool executions, register {@link onPostToolUseFailure} instead; + * this handler does not fire for non-success results. */ onPostToolUse?: PostToolUseHandler; + /** + * Called after a tool execution whose result was `"failure"`. + * + * Register this handler alongside {@link onPostToolUse} to observe failed + * tool calls — `onPostToolUse` only fires for successful results, so + * without this hook failed tool calls are invisible to extensions. + */ + onPostToolUseFailure?: PostToolUseFailureHandler; + /** * Called when the user submits a prompt */ diff --git a/nodejs/test/client.test.ts b/nodejs/test/client.test.ts index ff46c75b3..8ac8530af 100644 --- a/nodejs/test/client.test.ts +++ b/nodejs/test/client.test.ts @@ -1728,4 +1728,177 @@ describe("CopilotClient", () => { expect((client as any).options.sessionIdleTimeoutSeconds).toBe(600); }); }); + + describe("hooks dispatcher", () => { + // Direct unit tests for CopilotSession._handleHooksInvoke. The hook + // dispatch logic maps the CLI-emitted hook type (string) to the + // corresponding SessionHooks handler. These tests guard against + // regressions like the one fixed for postToolUseFailure (issue #1220). + + it("dispatches postToolUseFailure to onPostToolUseFailure handler", async () => { + const client = new CopilotClient(); + await client.start(); + onTestFinished(() => client.forceStop()); + + const received: { input: any; invocation: any }[] = []; + const session = await client.createSession({ + onPermissionRequest: approveAll, + hooks: { + onPostToolUseFailure: async (input, invocation) => { + received.push({ input, invocation }); + return { additionalContext: "failure observed" }; + }, + }, + }); + + const failureInput = { + toolName: "failing-tool", + toolArgs: { foo: "bar" }, + error: "exit 1", + timestamp: 1234, + cwd: "/tmp", + }; + const expectedInput = { + toolName: "failing-tool", + toolArgs: { foo: "bar" }, + error: "exit 1", + timestamp: new Date(1234), + workingDirectory: "/tmp", + }; + const result = await (session as any)._handleHooksInvoke( + "postToolUseFailure", + failureInput + ); + + expect(received).toHaveLength(1); + expect(received[0].input).toEqual(expectedInput); + expect(received[0].invocation.sessionId).toBe(session.sessionId); + expect(result).toEqual({ additionalContext: "failure observed" }); + }); + + it("does not fall back to onPostToolUse for postToolUseFailure events", async () => { + const client = new CopilotClient(); + await client.start(); + onTestFinished(() => client.forceStop()); + + const postUseCalls: string[] = []; + const session = await client.createSession({ + onPermissionRequest: approveAll, + hooks: { + // Only onPostToolUse registered; postToolUseFailure events + // must not be routed here. + onPostToolUse: async (input) => { + postUseCalls.push(input.toolName); + }, + }, + }); + + const result = await (session as any)._handleHooksInvoke("postToolUseFailure", { + toolName: "failing-tool", + toolArgs: {}, + error: "boom", + timestamp: 0, + cwd: "/tmp", + }); + + expect(postUseCalls).toHaveLength(0); + expect(result).toBeUndefined(); + }); + + it("dispatches postToolUse and postToolUseFailure to their respective handlers", async () => { + const client = new CopilotClient(); + await client.start(); + onTestFinished(() => client.forceStop()); + + const postCalls: string[] = []; + const failureCalls: string[] = []; + const session = await client.createSession({ + onPermissionRequest: approveAll, + hooks: { + onPostToolUse: async (input) => { + postCalls.push(input.toolName); + }, + onPostToolUseFailure: async (input) => { + failureCalls.push(input.toolName); + }, + }, + }); + + await (session as any)._handleHooksInvoke("postToolUse", { + toolName: "success-tool", + toolArgs: {}, + toolResult: { + textResultForLlm: "ok", + resultType: "success" as const, + }, + timestamp: 0, + cwd: "/tmp", + }); + await (session as any)._handleHooksInvoke("postToolUseFailure", { + toolName: "fail-tool", + toolArgs: {}, + error: "bad", + timestamp: 0, + cwd: "/tmp", + }); + + expect(postCalls).toEqual(["success-tool"]); + expect(failureCalls).toEqual(["fail-tool"]); + }); + + it("routes hooks.invoke JSON-RPC requests to the SessionHooks handler", async () => { + // Validates the full JSON-RPC entry point used by the CLI: + // CopilotClient.handleHooksInvoke({sessionId, hookType, input}) + // → CopilotSession._handleHooksInvoke(hookType, input) + // → SessionHooks.onPostToolUseFailure(normalizedInput, {sessionId}) + // + // This guards the wire-format contract that the bundled Copilot + // CLI relies on: the hookType string "postToolUseFailure" and the + // input shape `{toolName, toolArgs, error, timestamp, cwd}`. + // The SDK maps that to public `{..., timestamp: Date, workingDirectory}`. + const client = new CopilotClient(); + await client.start(); + onTestFinished(() => client.forceStop()); + + const received: { input: any; invocation: any }[] = []; + const session = await client.createSession({ + onPermissionRequest: approveAll, + hooks: { + onPostToolUseFailure: async (input, invocation) => { + received.push({ input, invocation }); + return { additionalContext: "context from failure hook" }; + }, + }, + }); + + const failureInput = { + toolName: "shell", + toolArgs: { command: "false" }, + error: "exit 1", + timestamp: 1700000000000, + cwd: "/tmp", + }; + + const response = await (client as any).handleHooksInvoke({ + sessionId: session.sessionId, + hookType: "postToolUseFailure", + input: failureInput, + }); + + expect(received).toHaveLength(1); + expect(received[0].input).toEqual({ + toolName: "shell", + toolArgs: { command: "false" }, + error: "exit 1", + timestamp: new Date(1700000000000), + workingDirectory: "/tmp", + }); + expect(received[0].invocation.sessionId).toBe(session.sessionId); + // The CLI only consumes output.additionalContext; the SDK returns + // it wrapped in `{ output }` per the JSON-RPC contract. + expect(response).toEqual({ + output: { additionalContext: "context from failure hook" }, + }); + }); + }); }); diff --git a/nodejs/test/e2e/hooks_extended.e2e.test.ts b/nodejs/test/e2e/hooks_extended.e2e.test.ts index b68a642c8..e0e82f813 100644 --- a/nodejs/test/e2e/hooks_extended.e2e.test.ts +++ b/nodejs/test/e2e/hooks_extended.e2e.test.ts @@ -7,6 +7,7 @@ import { z } from "zod"; import { approveAll, defineTool } from "../../src/index.js"; import type { ErrorOccurredHookInput, + PostToolUseFailureHookInput, PostToolUseHookInput, PreToolUseHookInput, SessionEndHookInput, @@ -299,4 +300,38 @@ describe("Extended session hooks", async () => { await session.disconnect(); }); + + it("should invoke postToolUseFailure hook for failed tool result", async () => { + const failureInputs: PostToolUseFailureHookInput[] = []; + const postToolUseInputs: PostToolUseHookInput[] = []; + const session = await client.createSession({ + onPermissionRequest: approveAll, + availableTools: ["report_intent"], + hooks: { + onPostToolUse: async (input) => { + postToolUseInputs.push(input); + }, + onPostToolUseFailure: async (input, invocation) => { + failureInputs.push(input); + expect(invocation.sessionId).toBe(session.sessionId); + return { additionalContext: "HOOK_FAILURE_GUIDANCE_APPLIED" }; + }, + }, + }); + + const response = await session.sendAndWait({ + prompt: "Call the view tool with path 'missing.txt'. If it fails, use the hook guidance to answer.", + }); + + expect(postToolUseInputs).toHaveLength(0); + expect(failureInputs).toHaveLength(1); + expect(failureInputs[0].toolName).toBe("view"); + expect(failureInputs[0].error).toContain("does not exist"); + expect((failureInputs[0].toolArgs as { path?: string }).path).toContain("missing.txt"); + expect(failureInputs[0].timestamp).toBeInstanceOf(Date); + expect(failureInputs[0].workingDirectory).toBeTruthy(); + expect(response?.data.content ?? "").toContain("HOOK_FAILURE_GUIDANCE_APPLIED"); + + await session.disconnect(); + }); }); diff --git a/python/README.md b/python/README.md index 3a504f966..6445ed1e9 100644 --- a/python/README.md +++ b/python/README.md @@ -696,6 +696,15 @@ async def on_post_tool_use(input, invocation): "additionalContext": "Post-execution notes", } +async def on_post_tool_use_failure(input, invocation): + # Fires when a tool's result was a failure. `on_post_tool_use` only fires + # on success, so register this handler to observe failed tool calls. The + # CLI extracts the failure message and passes it as the `error` field. + print(f"Tool {input['toolName']} failed: {input['error']}") + return { + "additionalContext": f"Retry guidance for {input['toolName']}", + } + async def on_user_prompt_submitted(input, invocation): print(f"User prompt: {input['prompt']}") return { @@ -723,6 +732,7 @@ async with await client.create_session( hooks={ "on_pre_tool_use": on_pre_tool_use, "on_post_tool_use": on_post_tool_use, + "on_post_tool_use_failure": on_post_tool_use_failure, "on_user_prompt_submitted": on_user_prompt_submitted, "on_session_start": on_session_start, "on_session_end": on_session_end, @@ -735,7 +745,8 @@ async with await client.create_session( **Available hooks:** - `on_pre_tool_use` - Intercept tool calls before execution. Can allow/deny or modify arguments. -- `on_post_tool_use` - Process tool results after execution. Can modify results or add context. +- `on_post_tool_use` - Process tool results after successful execution. Can modify results or add context. +- `on_post_tool_use_failure` - Observe failed tool executions and inject extra context to guide the model's next step. - `on_user_prompt_submitted` - Intercept user prompts. Can modify the prompt before processing. - `on_session_start` - Run logic when a session starts or resumes. - `on_session_end` - Cleanup or logging when session ends. diff --git a/python/copilot/__init__.py b/python/copilot/__init__.py index 874267c9f..53c8c5e34 100644 --- a/python/copilot/__init__.py +++ b/python/copilot/__init__.py @@ -91,6 +91,9 @@ PermissionNoResult, PermissionRequestResult, PostToolUseHandler, + PostToolUseFailureHandler, + PostToolUseFailureHookInput, + PostToolUseFailureHookOutput, PostToolUseHookInput, PostToolUseHookOutput, PreMcpToolCallHandler, @@ -199,6 +202,9 @@ "PermissionRequestResult", "PingResponse", "PostToolUseHandler", + "PostToolUseFailureHandler", + "PostToolUseFailureHookInput", + "PostToolUseFailureHookOutput", "PostToolUseHookInput", "PostToolUseHookOutput", "PreMcpToolCallHandler", diff --git a/python/copilot/session.py b/python/copilot/session.py index 90134a151..996a03535 100644 --- a/python/copilot/session.py +++ b/python/copilot/session.py @@ -714,6 +714,38 @@ class PostToolUseHookOutput(TypedDict, total=False): ] +class PostToolUseFailureHookInput(TypedDict): + """Input for post-tool-use-failure hook. + + Fires after a tool execution whose result was ``"failure"``. The CLI + extracts the failure message from the tool result and passes it as the + ``error`` field (rather than passing the full result object). + """ + + sessionId: str + timestamp: datetime + workingDirectory: str + toolName: str + toolArgs: Any + error: str + + +class PostToolUseFailureHookOutput(TypedDict, total=False): + """Output for post-tool-use-failure hook. + + Only ``additionalContext`` is consumed by the host CLI — it is appended + as hidden guidance to the model alongside the failed tool result. + """ + + additionalContext: str + + +PostToolUseFailureHandler = Callable[ + [PostToolUseFailureHookInput, dict[str, str]], + PostToolUseFailureHookOutput | None | Awaitable[PostToolUseFailureHookOutput | None], +] + + class UserPromptSubmittedHookInput(TypedDict): """Input for user-prompt-submitted hook""" @@ -817,6 +849,7 @@ class SessionHooks(TypedDict, total=False): on_pre_tool_use: PreToolUseHandler on_pre_mcp_tool_call: PreMcpToolCallHandler on_post_tool_use: PostToolUseHandler + on_post_tool_use_failure: PostToolUseFailureHandler on_user_prompt_submitted: UserPromptSubmittedHandler on_session_start: SessionStartHandler on_session_end: SessionEndHandler @@ -2071,6 +2104,7 @@ async def _handle_hooks_invoke(self, hook_type: str, input_data: Any) -> Any: "preToolUse": hooks.get("on_pre_tool_use"), "preMcpToolCall": hooks.get("on_pre_mcp_tool_call"), "postToolUse": hooks.get("on_post_tool_use"), + "postToolUseFailure": hooks.get("on_post_tool_use_failure"), "userPromptSubmitted": hooks.get("on_user_prompt_submitted"), "sessionStart": hooks.get("on_session_start"), "sessionEnd": hooks.get("on_session_end"), diff --git a/python/e2e/test_hooks_extended_e2e.py b/python/e2e/test_hooks_extended_e2e.py index dbaef75b0..4b43f0993 100644 --- a/python/e2e/test_hooks_extended_e2e.py +++ b/python/e2e/test_hooks_extended_e2e.py @@ -2,10 +2,11 @@ Extended hook lifecycle tests that mirror dotnet/test/HookLifecycleAndOutputTests.cs. E2E coverage for every handler exposed on ``SessionHooks``: -``on_pre_tool_use``, ``on_post_tool_use``, ``on_user_prompt_submitted``, -``on_session_start``, ``on_session_end``, ``on_error_occurred``. Output-shape -behavior (modifiedPrompt / additionalContext / errorHandling / modifiedArgs / -modifiedResult / sessionSummary) is asserted alongside hook invocation. +``on_pre_tool_use``, ``on_post_tool_use``, ``on_post_tool_use_failure``, +``on_user_prompt_submitted``, ``on_session_start``, ``on_session_end``, +``on_error_occurred``. Output-shape behavior (modifiedPrompt / +additionalContext / errorHandling / modifiedArgs / modifiedResult / +sessionSummary) is asserted alongside hook invocation. """ from __future__ import annotations @@ -184,3 +185,42 @@ async def on_post_tool_use(input_data, invocation): assert (response.data.content or "").strip().rstrip(".") in {"Done", "done"} finally: await session.disconnect() + + async def test_should_invoke_posttoolusefailure_hook_for_failed_tool_result( + self, ctx: E2ETestContext + ): + failure_inputs: list[dict] = [] + post_tool_use_inputs: list[dict] = [] + + async def on_post_tool_use(input_data, invocation): + post_tool_use_inputs.append(input_data) + return None + + async def on_post_tool_use_failure(input_data, invocation): + failure_inputs.append(input_data) + assert invocation["session_id"] == session.session_id + return {"additionalContext": "HOOK_FAILURE_GUIDANCE_APPLIED"} + + session = await ctx.client.create_session( + on_permission_request=PermissionHandler.approve_all, + available_tools=["report_intent"], + hooks={ + "on_post_tool_use": on_post_tool_use, + "on_post_tool_use_failure": on_post_tool_use_failure, + }, + ) + try: + response = await session.send_and_wait( + "Call the view tool with path 'missing.txt'. If it fails, use the hook guidance to answer." + ) + assert not post_tool_use_inputs + assert len(failure_inputs) == 1 + failure_input = failure_inputs[0] + assert failure_input["toolName"] == "view" + assert "does not exist" in failure_input["error"] + assert "missing.txt" in failure_input["toolArgs"]["path"] + assert failure_input["timestamp"].timestamp() > 0 + assert failure_input["workingDirectory"] + assert "HOOK_FAILURE_GUIDANCE_APPLIED" in (response.data.content or "") + finally: + await session.disconnect() diff --git a/python/test_client.py b/python/test_client.py index 14320b3a2..04ab02b65 100644 --- a/python/test_client.py +++ b/python/test_client.py @@ -4,6 +4,7 @@ This file is for unit tests. Where relevant, prefer to add e2e tests in e2e/*.py instead. """ +from datetime import UTC, datetime from unittest.mock import AsyncMock, patch import pytest @@ -1020,3 +1021,71 @@ def test_model_field_is_omitted_when_absent(self): } wire = client._convert_custom_agent_to_wire_format(agent) assert "model" not in wire + + +class TestPostToolUseFailureHookDispatch: + """Unit tests for the postToolUseFailure handler dispatch.""" + + @pytest.mark.asyncio + async def test_dispatches_to_on_post_tool_use_failure(self): + from copilot.session import CopilotSession, SessionHooks + + captured: dict = {} + + async def on_failure(input_data, invocation): + captured["input"] = input_data + captured["invocation"] = invocation + return {"additionalContext": f"saw {input_data['toolName']}: {input_data['error']}"} + + session = CopilotSession.__new__(CopilotSession) + CopilotSession.__init__(session, "sess-123", client=None) + session._hooks = SessionHooks(on_post_tool_use_failure=on_failure) # type: ignore[typeddict-item] + + result = await session._handle_hooks_invoke( + "postToolUseFailure", + { + "sessionId": "sess-x", + "timestamp": 1700000000, + "cwd": "/work", + "toolName": "tool-x", + "toolArgs": {"foo": "bar"}, + "error": "boom", + }, + ) + assert result == {"additionalContext": "saw tool-x: boom"} + assert captured["input"]["toolName"] == "tool-x" + assert captured["input"]["workingDirectory"] == "/work" + assert captured["input"]["timestamp"] == datetime.fromtimestamp(1700000000 / 1000, tz=UTC) + assert captured["invocation"] == {"session_id": "sess-123"} + + @pytest.mark.asyncio + async def test_returns_none_when_no_handler_registered(self): + from copilot.session import CopilotSession, SessionHooks + + session = CopilotSession.__new__(CopilotSession) + CopilotSession.__init__(session, "sess-x", client=None) + # Hooks registered, but no postToolUseFailure handler -> dispatch returns None. + session._hooks = SessionHooks(on_post_tool_use=lambda i, v: None) # type: ignore[typeddict-item] + + result = await session._handle_hooks_invoke( + "postToolUseFailure", + {"sessionId": "sess-x", "timestamp": 0, "cwd": "/", "toolName": "t", "toolArgs": None, "error": "e"}, + ) + assert result is None + + @pytest.mark.asyncio + async def test_sync_handler_works(self): + from copilot.session import CopilotSession, SessionHooks + + def on_failure(input_data, invocation): + return {"additionalContext": "sync-ok"} + + session = CopilotSession.__new__(CopilotSession) + CopilotSession.__init__(session, "sess-y", client=None) + session._hooks = SessionHooks(on_post_tool_use_failure=on_failure) # type: ignore[typeddict-item] + + result = await session._handle_hooks_invoke( + "postToolUseFailure", + {"sessionId": "sess-x", "timestamp": 0, "cwd": "/", "toolName": "t", "toolArgs": None, "error": "e"}, + ) + assert result == {"additionalContext": "sync-ok"} diff --git a/rust/README.md b/rust/README.md index f4d80fefd..00e26dbaa 100644 --- a/rust/README.md +++ b/rust/README.md @@ -290,7 +290,7 @@ let session = client .await?; ``` -**Hook events:** `PreToolUse`, `PostToolUse`, `UserPromptSubmitted`, `SessionStart`, `SessionEnd`, `ErrorOccurred`. Each carries typed input/output structs. Return `HookOutput::None` for events you don't handle. +**Hook events:** `PreToolUse`, `PostToolUse`, `PostToolUseFailure`, `UserPromptSubmitted`, `SessionStart`, `SessionEnd`, `ErrorOccurred`. Each carries typed input/output structs. `PostToolUse` only fires on success; override `on_post_tool_use_failure` to observe failed tool calls. Return `HookOutput::None` for events you don't handle. ### System Message Transforms diff --git a/rust/src/hooks.rs b/rust/src/hooks.rs index fedc6d98b..ec8cdfa3a 100644 --- a/rust/src/hooks.rs +++ b/rust/src/hooks.rs @@ -132,6 +132,43 @@ pub struct PostToolUseOutput { pub suppress_output: Option, } +/// Input for the `postToolUseFailure` hook — received after a tool execution +/// whose result was `"failure"`. +/// +/// `postToolUse` only fires for successful tool executions. Register a handler +/// for `postToolUseFailure` to observe failed tool calls. The CLI extracts the +/// failure message from the tool result and passes it as the `error` field +/// (rather than passing the full result object). +#[derive(Debug, Clone, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct PostToolUseFailureInput { + /// The runtime session ID of the session that triggered the hook. + pub session_id: String, + /// Unix timestamp (ms). + pub timestamp: i64, + /// Working directory. + #[serde(rename = "cwd")] + pub working_directory: PathBuf, + /// Name of the tool that failed. + pub tool_name: String, + /// Arguments that were passed to the tool. + pub tool_args: Value, + /// Failure message extracted from the tool's result. + pub error: String, +} + +/// Output for the `postToolUseFailure` hook. +/// +/// Only `additional_context` is consumed by the host CLI — it is appended as +/// hidden guidance to the model alongside the failed tool result. +#[derive(Debug, Clone, Default, Serialize)] +#[serde(rename_all = "camelCase")] +pub struct PostToolUseFailureOutput { + /// Extra context appended to the failed tool result for the agent. + #[serde(skip_serializing_if = "Option::is_none")] + pub additional_context: Option, +} + /// Input for the `userPromptSubmitted` hook — received when the user sends a message. #[derive(Debug, Clone, Deserialize)] #[serde(rename_all = "camelCase")] @@ -294,6 +331,15 @@ pub enum HookEvent { /// Session context. ctx: HookContext, }, + /// Fired after a tool execution whose result was `"failure"`. + /// [`HookEvent::PostToolUse`] only fires on success, so observe this + /// variant to react to failed tool calls. + PostToolUseFailure { + /// Typed input data. + input: PostToolUseFailureInput, + /// Session context. + ctx: HookContext, + }, /// Fired when the user sends a message. UserPromptSubmitted { /// Typed input data. @@ -339,6 +385,8 @@ pub enum HookOutput { PreMcpToolCall(PreMcpToolCallOutput), /// Response for a post-tool-use hook. PostToolUse(PostToolUseOutput), + /// Response for a post-tool-use-failure hook. + PostToolUseFailure(PostToolUseFailureOutput), /// Response for a user-prompt-submitted hook. UserPromptSubmitted(UserPromptSubmittedOutput), /// Response for a session-start hook. @@ -356,6 +404,7 @@ impl HookOutput { Self::PreToolUse(_) => "PreToolUse", Self::PreMcpToolCall(_) => "PreMcpToolCall", Self::PostToolUse(_) => "PostToolUse", + Self::PostToolUseFailure(_) => "PostToolUseFailure", Self::UserPromptSubmitted(_) => "UserPromptSubmitted", Self::SessionStart(_) => "SessionStart", Self::SessionEnd(_) => "SessionEnd", @@ -403,6 +452,11 @@ pub trait SessionHooks: Send + Sync + 'static { .await .map(HookOutput::PostToolUse) .unwrap_or(HookOutput::None), + HookEvent::PostToolUseFailure { input, ctx } => self + .on_post_tool_use_failure(input, ctx) + .await + .map(HookOutput::PostToolUseFailure) + .unwrap_or(HookOutput::None), HookEvent::UserPromptSubmitted { input, ctx } => self .on_user_prompt_submitted(input, ctx) .await @@ -457,6 +511,18 @@ pub trait SessionHooks: Send + Sync + 'static { None } + /// Called after a tool execution whose result was `"failure"`. The + /// success-only [`on_post_tool_use`](Self::on_post_tool_use) hook does + /// not fire for these outcomes, so override this method to observe or + /// inject extra context after failed tool calls. + async fn on_post_tool_use_failure( + &self, + _input: PostToolUseFailureInput, + _ctx: HookContext, + ) -> Option { + None + } + /// Called when the user submits a prompt. Return `Some(output)` to /// rewrite the prompt or inject extra context; `None` (default) passes /// through unchanged. @@ -527,6 +593,10 @@ pub(crate) async fn dispatch_hook( let input: PostToolUseInput = serde_json::from_value(raw_input)?; HookEvent::PostToolUse { input, ctx } } + "postToolUseFailure" => { + let input: PostToolUseFailureInput = serde_json::from_value(raw_input)?; + HookEvent::PostToolUseFailure { input, ctx } + } "userPromptSubmitted" => { let input: UserPromptSubmittedInput = serde_json::from_value(raw_input)?; HookEvent::UserPromptSubmitted { input, ctx } @@ -571,6 +641,7 @@ pub(crate) async fn dispatch_hook( ("preToolUse", HookOutput::PreToolUse(o)) => Some(serde_json::to_value(o)?), ("preMcpToolCall", HookOutput::PreMcpToolCall(o)) => Some(serde_json::to_value(o)?), ("postToolUse", HookOutput::PostToolUse(o)) => Some(serde_json::to_value(o)?), + ("postToolUseFailure", HookOutput::PostToolUseFailure(o)) => Some(serde_json::to_value(o)?), ("userPromptSubmitted", HookOutput::UserPromptSubmitted(o)) => { Some(serde_json::to_value(o)?) } @@ -750,6 +821,101 @@ mod tests { assert_eq!(result["output"], serde_json::json!({})); } + #[tokio::test] + async fn dispatch_post_tool_use_failure_default() { + // No handler override — should return an empty output object. + let hooks = TestHooks; + let input = serde_json::json!({ + "sessionId": "sess-1", + "timestamp": 1234567890, + "cwd": "/tmp", + "toolName": "some_tool", + "toolArgs": {"key": "value"}, + "error": "boom" + }); + let result = dispatch_hook( + &hooks, + &SessionId::new("sess-1"), + "postToolUseFailure", + input, + ) + .await + .unwrap(); + assert_eq!(result["output"], serde_json::json!({})); + } + + #[tokio::test] + async fn dispatch_post_tool_use_failure_returns_additional_context() { + struct FailureHooks; + #[async_trait] + impl SessionHooks for FailureHooks { + async fn on_post_tool_use_failure( + &self, + input: PostToolUseFailureInput, + _ctx: HookContext, + ) -> Option { + assert_eq!(input.session_id, "sess-1"); + assert_eq!(input.tool_name, "some_tool"); + assert_eq!(input.error, "boom"); + assert_eq!(input.working_directory, PathBuf::from("/tmp")); + Some(PostToolUseFailureOutput { + additional_context: Some(format!( + "tool {} failed: {}", + input.tool_name, input.error + )), + }) + } + } + + let input = serde_json::json!({ + "sessionId": "sess-1", + "timestamp": 1234567890, + "cwd": "/tmp", + "toolName": "some_tool", + "toolArgs": {}, + "error": "boom" + }); + let result = dispatch_hook( + &FailureHooks, + &SessionId::new("sess-1"), + "postToolUseFailure", + input, + ) + .await + .unwrap(); + assert_eq!( + result["output"]["additionalContext"], + "tool some_tool failed: boom" + ); + } + + #[tokio::test] + async fn dispatch_post_tool_use_failure_invalid_input_errors() { + // Missing required `error` field — dispatcher should surface the + // deserialization error rather than dispatching with empty input. + let hooks = TestHooks; + let input = serde_json::json!({ + "sessionId": "sess-1", + "timestamp": 1234567890, + "cwd": "/tmp", + "toolName": "some_tool", + "toolArgs": {} + }); + let err = dispatch_hook( + &hooks, + &SessionId::new("sess-1"), + "postToolUseFailure", + input, + ) + .await + .unwrap_err(); + let msg = err.to_string().to_ascii_lowercase(); + assert!( + msg.contains("error") || msg.contains("missing field"), + "unexpected error: {msg}" + ); + } + #[tokio::test] async fn dispatch_session_start() { struct StartHooks; diff --git a/rust/tests/e2e/hooks_extended.rs b/rust/tests/e2e/hooks_extended.rs index 00acc77cf..39e8d3d39 100644 --- a/rust/tests/e2e/hooks_extended.rs +++ b/rust/tests/e2e/hooks_extended.rs @@ -3,9 +3,10 @@ use std::sync::Arc; use async_trait::async_trait; use github_copilot_sdk::handler::ApproveAllHandler; use github_copilot_sdk::hooks::{ - ErrorOccurredInput, ErrorOccurredOutput, HookContext, PostToolUseInput, PostToolUseOutput, - PreToolUseInput, PreToolUseOutput, SessionEndInput, SessionEndOutput, SessionHooks, - SessionStartInput, SessionStartOutput, UserPromptSubmittedInput, UserPromptSubmittedOutput, + ErrorOccurredInput, ErrorOccurredOutput, HookContext, PostToolUseFailureInput, + PostToolUseFailureOutput, PostToolUseInput, PostToolUseOutput, PreToolUseInput, + PreToolUseOutput, SessionEndInput, SessionEndOutput, SessionHooks, SessionStartInput, + SessionStartOutput, UserPromptSubmittedInput, UserPromptSubmittedOutput, }; use github_copilot_sdk::tool::ToolHandler; use github_copilot_sdk::{Error, SessionConfig, Tool, ToolInvocation, ToolResult}; @@ -360,6 +361,57 @@ async fn should_allow_posttooluse_to_return_modifiedresult() { .await; } +#[tokio::test] +async fn should_invoke_posttoolusefailure_hook_for_failed_tool_result() { + with_e2e_context( + "hooks_extended", + "should_invoke_posttoolusefailure_hook_for_failed_tool_result", + |ctx| { + Box::pin(async move { + ctx.set_default_copilot_user(); + let (failure_tx, mut failure_rx) = mpsc::unbounded_channel(); + let (post_tx, mut post_rx) = mpsc::unbounded_channel(); + let client = ctx.start_client().await; + let session = client + .create_session( + ctx.approve_all_session_config() + .with_available_tools(["report_intent"]) + .with_hooks(Arc::new(RecordingHooks::post_tool_failure( + failure_tx, post_tx, + ))), + ) + .await + .expect("create session"); + + let answer = session + .send_and_wait( + "Call the view tool with path 'missing.txt'. If it fails, use the hook guidance to answer.", + ) + .await + .expect("send") + .expect("assistant message"); + + let input = recv_with_timeout(&mut failure_rx, "postToolUseFailure hook").await; + assert!(post_rx.try_recv().is_err()); + assert_eq!(input.tool_name, "view"); + assert!(input.error.contains("does not exist")); + assert!(input.tool_args["path"] + .as_str() + .is_some_and(|path| path.contains("missing.txt"))); + assert!(input.timestamp > 0); + assert!(!input.working_directory.as_os_str().is_empty()); + assert!( + assistant_message_content(&answer).contains("HOOK_FAILURE_GUIDANCE_APPLIED") + ); + + session.disconnect().await.expect("disconnect session"); + client.stop().await.expect("stop client"); + }) + }, + ) + .await; +} + #[derive(Default)] struct RecordingHooks { session_start: Option>, @@ -372,6 +424,7 @@ struct RecordingHooks { error_output: Option, pre_tool: Option>, post_tool: Option>, + post_tool_failure: Option>, } impl RecordingHooks { @@ -432,6 +485,17 @@ impl RecordingHooks { ..Self::default() } } + + fn post_tool_failure( + failure_tx: mpsc::UnboundedSender, + post_tx: mpsc::UnboundedSender, + ) -> Self { + Self { + post_tool: Some(post_tx), + post_tool_failure: Some(failure_tx), + ..Self::default() + } + } } #[async_trait] @@ -533,6 +597,20 @@ impl SessionHooks for RecordingHooks { } output } + + async fn on_post_tool_use_failure( + &self, + input: PostToolUseFailureInput, + ctx: HookContext, + ) -> Option { + assert!(!ctx.session_id.as_str().is_empty()); + if let Some(tx) = &self.post_tool_failure { + let _ = tx.send(input); + } + Some(PostToolUseFailureOutput { + additional_context: Some("HOOK_FAILURE_GUIDANCE_APPLIED".to_string()), + }) + } } struct EchoValueTool; diff --git a/test/snapshots/hooks_extended/should_invoke_posttoolusefailure_hook_for_failed_tool_result.yaml b/test/snapshots/hooks_extended/should_invoke_posttoolusefailure_hook_for_failed_tool_result.yaml new file mode 100644 index 000000000..ba83e0f7a --- /dev/null +++ b/test/snapshots/hooks_extended/should_invoke_posttoolusefailure_hook_for_failed_tool_result.yaml @@ -0,0 +1,24 @@ +models: + - claude-sonnet-4.5 +conversations: + - messages: + - role: system + content: ${system} + - role: user + content: Call the view tool with path 'missing.txt'. If it fails, use the hook guidance to answer. + - role: assistant + tool_calls: + - id: toolcall_0 + type: function + function: + name: view + arguments: '{"path":"${workdir}/missing.txt"}' + - role: tool + tool_call_id: toolcall_0 + content: Tool 'view' does not exist. Available tools that can be called are report_intent. + - role: user + content: |- + Tool "view" failed. Additional guidance from postToolUseFailure hook: + HOOK_FAILURE_GUIDANCE_APPLIED + - role: assistant + content: HOOK_FAILURE_GUIDANCE_APPLIED From 8ee27d8e4885f188009d7731b5a3f401c137ab99 Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Mon, 25 May 2026 16:21:14 -0400 Subject: [PATCH 2/6] Format Python hook tests Apply ruff formatting to the post-tool-use failure hook unit tests. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- python/test_client.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/python/test_client.py b/python/test_client.py index 04ab02b65..757322fa2 100644 --- a/python/test_client.py +++ b/python/test_client.py @@ -1069,7 +1069,14 @@ async def test_returns_none_when_no_handler_registered(self): result = await session._handle_hooks_invoke( "postToolUseFailure", - {"sessionId": "sess-x", "timestamp": 0, "cwd": "/", "toolName": "t", "toolArgs": None, "error": "e"}, + { + "sessionId": "sess-x", + "timestamp": 0, + "cwd": "/", + "toolName": "t", + "toolArgs": None, + "error": "e", + }, ) assert result is None @@ -1086,6 +1093,13 @@ def on_failure(input_data, invocation): result = await session._handle_hooks_invoke( "postToolUseFailure", - {"sessionId": "sess-x", "timestamp": 0, "cwd": "/", "toolName": "t", "toolArgs": None, "error": "e"}, + { + "sessionId": "sess-x", + "timestamp": 0, + "cwd": "/", + "toolName": "t", + "toolArgs": None, + "error": "e", + }, ) assert result == {"additionalContext": "sync-ok"} From 1ea19e0b39bbb567c233f3bebbd4074ed5f2be46 Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Mon, 25 May 2026 16:30:36 -0400 Subject: [PATCH 3/6] Address PR review feedback Fix Rust E2E formatting and update docs-maintenance Python validation paths. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .github/agents/docs-maintenance.agent.md | 6 +++--- rust/tests/e2e/hooks_extended.rs | 10 ++++++---- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/.github/agents/docs-maintenance.agent.md b/.github/agents/docs-maintenance.agent.md index 2a6685de2..b1bf95ae8 100644 --- a/.github/agents/docs-maintenance.agent.md +++ b/.github/agents/docs-maintenance.agent.md @@ -353,9 +353,9 @@ cat nodejs/src/types.ts | grep -A 10 "export interface ExportSessionOptions" grep -E "^\s+async def [a-z]" python/copilot/client.py python/copilot/session.py # Key types -cat python/copilot/types.py | grep -A 20 "class CopilotClientOptions" -cat python/copilot/types.py | grep -A 30 "class SessionConfig" -cat python/copilot/types.py | grep -A 15 "class SessionHooks" +cat python/copilot/client.py | grep -A 20 "class _CopilotClientOptions" +cat python/copilot/client.py | grep -A 80 "async def create_session" +cat python/copilot/session.py | grep -A 15 "class SessionHooks" ``` **Must match (snake_case):** diff --git a/rust/tests/e2e/hooks_extended.rs b/rust/tests/e2e/hooks_extended.rs index 39e8d3d39..de71cf055 100644 --- a/rust/tests/e2e/hooks_extended.rs +++ b/rust/tests/e2e/hooks_extended.rs @@ -391,13 +391,15 @@ async fn should_invoke_posttoolusefailure_hook_for_failed_tool_result() { .expect("send") .expect("assistant message"); - let input = recv_with_timeout(&mut failure_rx, "postToolUseFailure hook").await; + let input = recv_with_timeout(&mut failure_rx, "postToolUseFailure hook").await; assert!(post_rx.try_recv().is_err()); assert_eq!(input.tool_name, "view"); assert!(input.error.contains("does not exist")); - assert!(input.tool_args["path"] - .as_str() - .is_some_and(|path| path.contains("missing.txt"))); + assert!( + input.tool_args["path"] + .as_str() + .is_some_and(|path| path.contains("missing.txt")) + ); assert!(input.timestamp > 0); assert!(!input.working_directory.as_os_str().is_empty()); assert!( From 2f29f000aa528465f3f6c840ac92a8fd73de8f3b Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Mon, 25 May 2026 16:33:13 -0400 Subject: [PATCH 4/6] Fix Python lint issues Sort hook exports and wrap the long E2E prompt string. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- python/copilot/__init__.py | 2 +- python/e2e/test_hooks_extended_e2e.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/python/copilot/__init__.py b/python/copilot/__init__.py index 53c8c5e34..2b9090f74 100644 --- a/python/copilot/__init__.py +++ b/python/copilot/__init__.py @@ -90,10 +90,10 @@ PermissionHandler, PermissionNoResult, PermissionRequestResult, - PostToolUseHandler, PostToolUseFailureHandler, PostToolUseFailureHookInput, PostToolUseFailureHookOutput, + PostToolUseHandler, PostToolUseHookInput, PostToolUseHookOutput, PreMcpToolCallHandler, diff --git a/python/e2e/test_hooks_extended_e2e.py b/python/e2e/test_hooks_extended_e2e.py index 4b43f0993..a0216e47f 100644 --- a/python/e2e/test_hooks_extended_e2e.py +++ b/python/e2e/test_hooks_extended_e2e.py @@ -211,7 +211,8 @@ async def on_post_tool_use_failure(input_data, invocation): ) try: response = await session.send_and_wait( - "Call the view tool with path 'missing.txt'. If it fails, use the hook guidance to answer." + "Call the view tool with path 'missing.txt'. " + "If it fails, use the hook guidance to answer." ) assert not post_tool_use_inputs assert len(failure_inputs) == 1 From 86a638bd48c21abeddf9c10e0f6a7725e546c24e Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Mon, 25 May 2026 16:40:29 -0400 Subject: [PATCH 5/6] Fix failure hook snapshot text Match the bundled runtime's postToolUseFailure guidance label in the shared E2E snapshot. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- ...d_invoke_posttoolusefailure_hook_for_failed_tool_result.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/snapshots/hooks_extended/should_invoke_posttoolusefailure_hook_for_failed_tool_result.yaml b/test/snapshots/hooks_extended/should_invoke_posttoolusefailure_hook_for_failed_tool_result.yaml index ba83e0f7a..bbe815735 100644 --- a/test/snapshots/hooks_extended/should_invoke_posttoolusefailure_hook_for_failed_tool_result.yaml +++ b/test/snapshots/hooks_extended/should_invoke_posttoolusefailure_hook_for_failed_tool_result.yaml @@ -18,7 +18,7 @@ conversations: content: Tool 'view' does not exist. Available tools that can be called are report_intent. - role: user content: |- - Tool "view" failed. Additional guidance from postToolUseFailure hook: + Tool "view" failed. Additional guidance from postToolUseFailure hooks: HOOK_FAILURE_GUIDANCE_APPLIED - role: assistant content: HOOK_FAILURE_GUIDANCE_APPLIED From a91427a1845e1d8243796275366430a012e098a2 Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Mon, 25 May 2026 17:01:38 -0400 Subject: [PATCH 6/6] Fix Rust hook E2E helper Make the Rust E2E RecordingHooks failure hook a no-op unless the failure hook is explicitly configured. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- rust/tests/e2e/hooks_extended.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/rust/tests/e2e/hooks_extended.rs b/rust/tests/e2e/hooks_extended.rs index de71cf055..d4b6b0a55 100644 --- a/rust/tests/e2e/hooks_extended.rs +++ b/rust/tests/e2e/hooks_extended.rs @@ -608,10 +608,11 @@ impl SessionHooks for RecordingHooks { assert!(!ctx.session_id.as_str().is_empty()); if let Some(tx) = &self.post_tool_failure { let _ = tx.send(input); + return Some(PostToolUseFailureOutput { + additional_context: Some("HOOK_FAILURE_GUIDANCE_APPLIED".to_string()), + }); } - Some(PostToolUseFailureOutput { - additional_context: Some("HOOK_FAILURE_GUIDANCE_APPLIED".to_string()), - }) + None } }