github · pelikhan · May 23, 2026 · May 23, 2026
diff --git a/setup/js/handle_agent_failure.cjs b/setup/js/handle_agent_failure.cjs
@@ -19,6 +19,14 @@ const fs = require("fs");
 const path = require("path");
 
 const DEFAULT_ACTION_FAILURE_ISSUE_EXPIRES_HOURS = 24 * 7;
+const DEFAULT_OTEL_JSONL_PATH = "/tmp/gh-aw/otel.jsonl";
+// Engine-side 429/rate-limit signatures:
+// - HTTP 429 accompanied by "too many requests"/"rate limit" phrasing
+// - provider error codes like rate_limit_error / rate_limit_exceeded
+// - Copilot/CAPI "CAPIError: 429" and utility-model quota text
+// - retry wrapper text that includes the canonical "Failed to get response..." phrase
+const ENGINE_RATE_LIMIT_429_RE =
+  /(?:\b429\b[\s\S]{0,120}(?:too many requests|rate[\s-]*limit)|rate_limit_(?:error|exceeded)|capierror:\s*429|failed to get response from the ai model[\s\S]{0,120}\b429\b|exceeded your rate limit for utility models)/i;
 
 /**
  * Parse action failure issue expiration from environment.
@@ -1093,6 +1101,55 @@ function buildModelNotSupportedErrorContext(hasModelNotSupportedError) {
   }
 }
 
+/**
+ * Detect HTTP 429/rate-limit engine failures in text payloads.
+ * @param {string} content
+ * @returns {boolean}
+ */
+function hasEngineRateLimit429Signal(content) {
+  if (!content) {
+    return false;
+  }
+  return ENGINE_RATE_LIMIT_429_RE.test(content);
+}
+
+/**
+ * Detect HTTP 429/rate-limit engine failures from OTLP JSONL mirror payloads.
+ * @param {string} [otelJsonlPathOverride]
+ * @returns {boolean}
+ */
+function hasEngineRateLimit429InOTELMirror(otelJsonlPathOverride) {
+  const otelJsonlPath = otelJsonlPathOverride || process.env.GH_AW_OTEL_JSONL_PATH || DEFAULT_OTEL_JSONL_PATH;
+  try {
+    if (!fs.existsSync(otelJsonlPath)) {
+      return false;
+    }
+    const content = fs.readFileSync(otelJsonlPath, "utf8");
+    return hasEngineRateLimit429Signal(content);
+  } catch {
+    return false;
+  }
+}
+
+/**
+ * Build dedicated context for engine 429/rate-limit failures.
+ * @param {string} engineLabel
+ * @returns {string}
+ */
+function buildEngineRateLimit429Context(engineLabel) {
+  const normalizedEngineLabel = engineLabel.trim() || "AI";
+  const templatePath = getPromptPath("engine_rate_limit_429.md");
+  try {
+    return "\n" + renderTemplateFromFile(templatePath, { engine_label: normalizedEngineLabel });
+  } catch {
+    return (
+      `\n**🚦 Engine Rate Limited (HTTP 429)**: The ${normalizedEngineLabel} engine hit provider rate limits and could not complete this run.\n\n` +
+      "This signal was detected from engine runtime logs/OTLP telemetry.\n\n" +
+      "Retry after a short delay. If this recurs, reduce concurrent runs or review provider quota/rate-limit policies.\n"
+    );
+  }
+}
+
 /**
  * Read and render token usage from token-usage.jsonl for inclusion in the ET computation table.
  * Returns null gracefully when files are absent, empty, or unparseable.
@@ -1490,6 +1547,13 @@ function buildEngineFailureContext() {
       return "";
     }
 
+    // Special handling for provider-side 429/rate-limit failures. These can appear
+    // in agent stdio output or only in mirrored OTLP telemetry payloads.
+    if (hasEngineRateLimit429Signal(logContent) || hasEngineRateLimit429InOTELMirror()) {
+      core.info("Detected engine HTTP 429/rate-limit signal — using dedicated context message");
+      return buildEngineRateLimit429Context(engineLabel);
+    }
+
     const errorMessages = new Set();
 
     for (const line of lines) {
@@ -2581,6 +2645,9 @@ module.exports = {
   buildPermissionDeniedContext,
   buildCredentialAuthErrorContext,
   buildEffectiveTokensRateLimitErrorContext,
+  hasEngineRateLimit429Signal,
+  hasEngineRateLimit429InOTELMirror,
+  buildEngineRateLimit429Context,
   readTokenUsageMarkdown,
   parseFirewallAuthErrors,
   parseMaxEffectiveTokensFromAuditLog,

diff --git a/setup/js/parse_copilot_log.cjs b/setup/js/parse_copilot_log.cjs
@@ -292,6 +292,16 @@ function parsePrettyPrintFormat(logContent) {
         premiumRequests = parseFloat(premMatch[1]);
         hasPremiumModel = true;
       }
+      // Newer Copilot CLI footer: "Tokens    ↑ 163.9k • ↓ 567 • 149.2k (cached)"
+      // The arrow + (cached) form has no "Breakdown by AI model" section, so this
+      // is the only place token totals appear. Capture them when present so they
+      // surface in the Information section.
+      const tokenMatch = trimmed.match(/^Tokens\s+↑\s*([\d.]+k?)\s*[•·]\s*↓\s*([\d.]+k?)(?:\s*[•·]\s*([\d.]+k?)\s*\(cached\))?/);
+      if (tokenMatch) {
+        if (inputTokens === 0) inputTokens = parseTokenCount(tokenMatch[1]);
+        if (outputTokens === 0) outputTokens = parseTokenCount(tokenMatch[2]);
+        if (tokenMatch[3] && cacheReadTokens === 0) cacheReadTokens = parseTokenCount(tokenMatch[3]);
+      }
       i++;
       continue;
     }

diff --git a/setup/md/engine_rate_limit_429.md b/setup/md/engine_rate_limit_429.md
@@ -0,0 +1,7 @@
+**🚦 Engine Rate Limited (HTTP 429)**: The {engine_label} engine hit provider rate limits and could not complete this run.
+
+This signal was detected from engine runtime logs/OTLP telemetry.
+
+**What to do next**
+- Retry the workflow after a short delay.
+- If this keeps happening, reduce concurrent workflow volume or review provider quota/rate-limit policies.