Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
237 changes: 157 additions & 80 deletions .github/workflows/pr-description-caveman.lock.yml

Large diffs are not rendered by default.

18 changes: 18 additions & 0 deletions actions/setup/js/claude_harness.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ const OVERLOADED_ERROR_PATTERN = /overloaded_error|"overloaded"/i;
// - embedded stream-json result fields (e.g. "api_error_status":429)
// - human-readable message text ("rate limit")
const RATE_LIMIT_ERROR_PATTERN = /rate_limit_error|429 Too Many Requests|"api_error_status"\s*:\s*429|request rejected \(429\)|rate limit/i;
const AUTHENTICATION_FAILED_PATTERN = /Authentication failed(?:\s*\(Request ID:[^)]+\))?/i;

// Pattern to detect a clean max-turns exit from Claude Code.
// Claude Code emits a JSON result object with "subtype":"error_max_turns" when the
Expand Down Expand Up @@ -112,6 +113,15 @@ function isRateLimitError(output) {
return RATE_LIMIT_ERROR_PATTERN.test(output);
}

/**
* Determines if the collected output contains an authentication failed error.
* @param {string} output - Collected stdout+stderr from the process
* @returns {boolean}
*/
function isAuthenticationFailedError(output) {
return AUTHENTICATION_FAILED_PATTERN.test(output);
}

/**
* Determines if the collected output signals a clean max-turns exit.
* When Claude Code hits its turn limit it emits a result object with
Expand Down Expand Up @@ -441,6 +451,7 @@ async function main() {

const isOverloaded = isOverloadedError(result.output);
const isRateLimit = isRateLimitError(result.output);
const isAuthenticationFailed = isAuthenticationFailedError(result.output);
const isMaxTurns = isMaxTurnsExit(result.output);
const isNoDeferredMarker = isNoDeferredMarkerError(result.output);
const permissionDeniedCount = countPermissionDeniedIssues(result.output);
Expand All @@ -450,6 +461,7 @@ async function main() {
` exitCode=${result.exitCode}` +
` isOverloadedError=${isOverloaded}` +
` isRateLimitError=${isRateLimit}` +
` isAuthenticationFailedError=${isAuthenticationFailed}` +
` isMaxTurnsExit=${isMaxTurns}` +
` isNoDeferredMarkerError=${isNoDeferredMarker}` +
` permissionDeniedCount=${permissionDeniedCount}` +
Expand All @@ -458,6 +470,11 @@ async function main() {
` retriesRemaining=${MAX_RETRIES - attempt}`
);

if (attempt === 0 && isAuthenticationFailed) {
log(`attempt ${attempt + 1}: authentication failed — not retrying (first-attempt auth failure is non-retryable)`);
break;
}

if (hasNumerousPermissionDenied) {
const deniedCommands = extractDeniedCommands(result.output);
emitMissingToolPermissionIssue({ deniedCommands });
Expand Down Expand Up @@ -538,6 +555,7 @@ if (typeof module !== "undefined" && module.exports) {
resolveClaudePromptFileArgs,
stripPromptFileArgs,
isRateLimitError,
isAuthenticationFailedError,
isMaxTurnsExit,
isNoDeferredMarkerError,
isSignalTerminationExitCode,
Expand Down
32 changes: 32 additions & 0 deletions actions/setup/js/claude_harness.test.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ const {
resolveClaudePromptFileArgs,
stripPromptFileArgs,
isRateLimitError,
isAuthenticationFailedError,
isMaxTurnsExit,
isNoDeferredMarkerError,
isSignalTerminationExitCode,
Expand Down Expand Up @@ -181,6 +182,17 @@ describe("claude_harness.cjs", () => {
});
});

describe("isAuthenticationFailedError", () => {
it("returns true for authentication failed with request id", () => {
expect(isAuthenticationFailedError("Authentication failed (Request ID: C818:3ED713:19D401B:1C446B7:69D653CA)")).toBe(true);
});

it("returns false for unrelated output", () => {
expect(isAuthenticationFailedError("No authentication information found")).toBe(false);
expect(isAuthenticationFailedError("rate_limit_error")).toBe(false);
});
});

describe("isNoDeferredMarkerError", () => {
it("returns true for the canonical no-deferred-marker error message", () => {
const output =
Expand Down Expand Up @@ -437,4 +449,24 @@ process.exit(0);
expect(result).toBe(false);
});
});

describe("auth failure retry policy", () => {
const MAX_RETRIES = 3;

/**
* @param {{hasOutput: boolean, exitCode: number, output: string}} result
* @param {number} attempt
* @returns {boolean}
*/
function shouldRetry(result, attempt) {
if (result.exitCode === 0) return false;
if (attempt === 0 && isAuthenticationFailedError(result.output)) return false;
return attempt < MAX_RETRIES && result.hasOutput;
}

it("does not retry when first attempt fails authentication", () => {
const result = { exitCode: 1, hasOutput: true, output: "Authentication failed (Request ID: 123)" };
expect(shouldRetry(result, 0)).toBe(false);
});
});
});
18 changes: 18 additions & 0 deletions actions/setup/js/codex_harness.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ const MAX_DELAY_MS = 60000;
// Matches "rate_limit_exceeded" from the OpenAI error type field and the "429" status code
// that Codex emits when the API rate limit is hit.
const RATE_LIMIT_ERROR_PATTERN = /rate_limit_exceeded|429 Too Many Requests|RateLimitError/i;
const AUTHENTICATION_FAILED_PATTERN = /Authentication failed(?:\s*\(Request ID:[^)]+\))?/i;

// Pattern to detect OpenAI server-side errors (HTTP 500, 503).
// These are transient infrastructure failures that may resolve on retry.
Expand Down Expand Up @@ -85,6 +86,15 @@ function isRateLimitError(output) {
return RATE_LIMIT_ERROR_PATTERN.test(output);
}

/**
* Determines if the collected output contains an authentication failed error.
* @param {string} output - Collected stdout+stderr from the process
* @returns {boolean}
*/
function isAuthenticationFailedError(output) {
return AUTHENTICATION_FAILED_PATTERN.test(output);
}

/**
* Determines if the collected output contains an OpenAI server error.
* @param {string} output - Collected stdout+stderr from the process
Expand Down Expand Up @@ -297,20 +307,27 @@ async function main() {
}

const isRateLimit = isRateLimitError(result.output);
const isAuthenticationFailed = isAuthenticationFailedError(result.output);
const isServer = isServerError(result.output);
const permissionDeniedCount = countPermissionDeniedIssues(result.output);
const hasNumerousPermissionDenied = hasNumerousPermissionDeniedIssues(result.output);
log(
`attempt ${attempt + 1} failed:` +
` exitCode=${result.exitCode}` +
` isRateLimitError=${isRateLimit}` +
` isAuthenticationFailedError=${isAuthenticationFailed}` +
` isServerError=${isServer}` +
` permissionDeniedCount=${permissionDeniedCount}` +
` hasNumerousPermissionDenied=${hasNumerousPermissionDenied}` +
` hasOutput=${result.hasOutput}` +
` retriesRemaining=${MAX_RETRIES - attempt}`
);

if (attempt === 0 && isAuthenticationFailed) {
log(`attempt ${attempt + 1}: authentication failed — not retrying (first-attempt auth failure is non-retryable)`);
break;
}

if (hasNumerousPermissionDenied) {
const deniedCommands = extractDeniedCommands(result.output);
emitMissingToolPermissionIssue({ deniedCommands });
Expand Down Expand Up @@ -347,6 +364,7 @@ if (typeof module !== "undefined" && module.exports) {
module.exports = {
resolveCodexPromptFileArgs,
isRateLimitError,
isAuthenticationFailedError,
isServerError,
countPermissionDeniedIssues,
hasNumerousPermissionDeniedIssues,
Expand Down
28 changes: 27 additions & 1 deletion actions/setup/js/codex_harness.test.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,16 @@ import os from "os";
import path from "path";

const require = createRequire(import.meta.url);
const { resolveCodexPromptFileArgs, isRateLimitError, isServerError, countPermissionDeniedIssues, hasNumerousPermissionDeniedIssues, extractDeniedCommands, buildMissingToolPermissionIssuePayload } = require("./codex_harness.cjs");
const {
resolveCodexPromptFileArgs,
isRateLimitError,
isAuthenticationFailedError,
isServerError,
countPermissionDeniedIssues,
hasNumerousPermissionDeniedIssues,
extractDeniedCommands,
buildMissingToolPermissionIssuePayload,
} = require("./codex_harness.cjs");

describe("codex_harness.cjs", () => {
describe("resolveCodexPromptFileArgs", () => {
Expand Down Expand Up @@ -81,6 +90,17 @@ describe("codex_harness.cjs", () => {
});
});

describe("isAuthenticationFailedError", () => {
it("returns true for authentication failed with request id", () => {
expect(isAuthenticationFailedError("Authentication failed (Request ID: C818:3ED713:19D401B:1C446B7:69D653CA)")).toBe(true);
});

it("returns false for non-authentication-failed output", () => {
expect(isAuthenticationFailedError("No authentication information found")).toBe(false);
expect(isAuthenticationFailedError("rate_limit_exceeded")).toBe(false);
});
Comment on lines +93 to +101
});

describe("isServerError", () => {
it("returns true for InternalServerError", () => {
expect(isServerError("InternalServerError: The server had an error processing your request")).toBe(true);
Expand Down Expand Up @@ -202,6 +222,7 @@ describe("codex_harness.cjs", () => {
if (result.exitCode === 0) return false;
const RATE_LIMIT_ERROR_PATTERN = /rate_limit_exceeded|429 Too Many Requests|RateLimitError/i;
const SERVER_ERROR_PATTERN = /InternalServerError|ServiceUnavailableError|500 Internal Server Error|503 Service Unavailable/i;
if (attempt === 0 && isAuthenticationFailedError(result.output)) return false;
if (hasNumerousPermissionDeniedIssues(result.output)) return false;
const isTransient = RATE_LIMIT_ERROR_PATTERN.test(result.output) || SERVER_ERROR_PATTERN.test(result.output);
return attempt < MAX_RETRIES && (result.hasOutput || isTransient);
Expand All @@ -222,6 +243,11 @@ describe("codex_harness.cjs", () => {
expect(shouldRetry(result, 0)).toBe(true);
});

it("does not retry when first attempt fails authentication", () => {
const result = { exitCode: 1, hasOutput: true, output: "Authentication failed (Request ID: ABC123)" };
expect(shouldRetry(result, 0)).toBe(false);
});

it("does not retry when no output was produced and no transient error", () => {
const result = { exitCode: 1, hasOutput: false, output: "" };
expect(shouldRetry(result, 0)).toBe(false);
Expand Down
21 changes: 21 additions & 0 deletions actions/setup/js/copilot_harness.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,10 @@ const MODEL_NOT_SUPPORTED_PATTERN = /The requested model is not supported/;
// case the driver falls back to a fresh run (without --continue) to re-do env-var auth.
// On a fresh run the token is genuinely absent — retrying will not help.
const NO_AUTH_INFO_PATTERN = /No authentication information found/;
// Pattern to detect authentication failures returned by Copilot API.
// After a first-attempt auth failure, retrying is futile because the entrypoint unsets
// COPILOT_GITHUB_TOKEN between attempts.
const AUTHENTICATION_FAILED_PATTERN = /Authentication failed(?:\s*\(Request ID:[^)]+\))?/i;
Comment on lines +90 to +92

// Pattern to detect null-type tool_call error that poisons conversation history.
// Matches the Copilot API 400 error:
Expand Down Expand Up @@ -153,6 +157,15 @@ function isNoAuthInfoError(output) {
return NO_AUTH_INFO_PATTERN.test(output);
}

/**
* Determines if the collected output contains an authentication failed error.
* @param {string} output - Collected stdout+stderr from the process
* @returns {boolean}
*/
function isAuthenticationFailedError(output) {
return AUTHENTICATION_FAILED_PATTERN.test(output);
}

/**
* Determines if the collected output contains a null-type tool_call error.
* This error occurs when the model emits a malformed tool call with type: null.
Expand Down Expand Up @@ -462,6 +475,7 @@ async function main() {
const isMCPPolicy = isMCPPolicyError(result.output);
const isModelNotSupported = isModelNotSupportedError(result.output);
const isAuthErr = isNoAuthInfoError(result.output);
const isAuthenticationFailed = isAuthenticationFailedError(result.output);
const isNullTypeToolCall = isNullTypeToolCallError(result.output);
const permissionDeniedCount = countPermissionDeniedIssues(result.output);
const hasNumerousPermissionDenied = hasNumerousPermissionDeniedIssues(result.output);
Expand All @@ -473,12 +487,18 @@ async function main() {
` isModelNotSupportedError=${isModelNotSupported}` +
` isNullTypeToolCallError=${isNullTypeToolCall}` +
` isAuthError=${isAuthErr}` +
` isAuthenticationFailedError=${isAuthenticationFailed}` +
` permissionDeniedCount=${permissionDeniedCount}` +
` hasNumerousPermissionDenied=${hasNumerousPermissionDenied}` +
` hasOutput=${result.hasOutput}` +
` retriesRemaining=${MAX_RETRIES - attempt}`
);

if (attempt === 0 && isAuthenticationFailed) {
log(`attempt ${attempt + 1}: authentication failed — not retrying (first-attempt auth failure is non-retryable)`);
break;
}

if (hasNumerousPermissionDenied) {
const deniedCommands = extractDeniedCommands(result.output);
emitMissingToolPermissionIssue({ deniedCommands });
Expand Down Expand Up @@ -596,6 +616,7 @@ if (typeof module !== "undefined" && module.exports) {
countPermissionDeniedIssues,
hasNumerousPermissionDeniedIssues,
buildMissingToolPermissionIssuePayload,
isAuthenticationFailedError,
resolvePromptFileArgs,
};
}
Expand Down
17 changes: 17 additions & 0 deletions actions/setup/js/copilot_harness.test.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ const {
emitInfrastructureIncomplete,
extractDeniedCommands,
hasNumerousPermissionDeniedIssues,
isAuthenticationFailedError,
enrichReflectModels,
extractModelIds,
fetchAWFReflect,
Expand Down Expand Up @@ -410,6 +411,16 @@ describe("copilot_harness.cjs", () => {
});
});

describe("authentication-failed detection pattern", () => {
it("matches authentication failed with request id", () => {
expect(isAuthenticationFailedError("Authentication failed (Request ID: C818:3ED713:19D401B:1C446B7:69D653CA)")).toBe(true);
});

it("does not match no-auth-info error", () => {
expect(isAuthenticationFailedError("Error: No authentication information found.")).toBe(false);
});
});

describe("auth error prevents retry", () => {
// Inline the same retry logic as the driver, including auth error check
const MCP_POLICY_BLOCKED_PATTERN = /MCP servers were blocked by policy:/;
Expand All @@ -426,6 +437,7 @@ describe("copilot_harness.cjs", () => {
if (result.exitCode === 0) return false;
// MCP policy errors are persistent — never retry
if (MCP_POLICY_BLOCKED_PATTERN.test(result.output)) return false;
if (attempt === 0 && isAuthenticationFailedError(result.output)) return false;
// Auth error on --continue: fall back to fresh run once; on fresh run: bail
if (NO_AUTH_INFO_PATTERN.test(result.output)) {
return useContinueOnRetry && attempt < MAX_RETRIES;
Expand All @@ -438,6 +450,11 @@ describe("copilot_harness.cjs", () => {
expect(shouldRetry(result, 0, false)).toBe(false);
});

it("does not retry when first attempt reports authentication failed", () => {
const result = { exitCode: 1, hasOutput: true, output: "Authentication failed (Request ID: ABC123)" };
expect(shouldRetry(result, 0, false)).toBe(false);
});

it("retries as fresh run when auth fails on a --continue attempt", () => {
// This replicates the fix: attempt 1 ran for 3+ min then failed mid-stream,
// attempt 2 (--continue) fails with auth error — driver retries once as fresh run.
Expand Down