cloudflare · threepointone · Jun 1, 2026 · May 31, 2026
diff --git a/.changeset/chat-recovery-preserve-settled-work.md b/.changeset/chat-recovery-preserve-settled-work.md
@@ -0,0 +1,26 @@
+---
+"@cloudflare/think": patch
+"@cloudflare/ai-chat": patch
+---
+
+Stop chat recovery from discarding settled work when a turn is given up on
+(#1631).
+
+Two paths could throw away a partial assistant message containing completed,
+often non-idempotent tool results:
+
+- When the framework's own recovery budget was exhausted, `_exhaustChatRecovery`
+  sealed the turn (terminal status + banner) **before** the orphaned stream was
+  ever persisted — so every settled tool result the turn had produced was lost
+  and the model re-ran them on the next message. Exhaustion now persists the
+  settled partial first, using the same gating as the normal recovery path so it
+  can't duplicate an already-saved partial.
+- A subclass `onChatRecovery` returning `{ persist: false }` to stop a turn used
+  to silently drop the settled partial. Settled work is now **never** dropped:
+  `persist: false` only suppresses persistence of a partial that has nothing
+  settled to lose; a partial carrying settled tool results is persisted
+  regardless. An app can no longer accidentally discard completed work — and it
+  never needs `{ persist: true }` just to stay safe. (A safe default beats a
+  warning about an unsafe one.)
+
+Applied identically to `@cloudflare/think` and `@cloudflare/ai-chat`.
diff --git a/packages/ai-chat/src/index.ts b/packages/ai-chat/src/index.ts
@@ -3227,6 +3227,26 @@ export class AIChatAgent<
     await this._bumpChatRecoveryProgress();
   }
 
+  /** Whether a reconstructed partial carries any settled (provider-accepted)
+   *  tool result — the completed, often non-idempotent work that a
+   *  `{ persist: false }` recovery return would silently discard.
+   *  `convertToModelMessages` treats `output-available` / `output-error` /
+   *  `output-denied` (or a part carrying `output`/`result`) as settled. */
+  private _partialHasSettledToolResults(parts: MessagePart[]): boolean {
+    return parts.some((part) => {
+      const record = part as Record<string, unknown>;
+      const type = typeof record.type === "string" ? record.type : "";
+      if (!(type.startsWith("tool-") || type === "dynamic-tool")) return false;
+      if ("output" in record || "result" in record) return true;
+      const state = typeof record.state === "string" ? record.state : "";
+      return (
+        state === "output-available" ||
+        state === "output-error" ||
+        state === "output-denied"
+      );
+    });
+  }
+
   /** Sweep recovery incidents that have been inactive past the TTL. */
   private async _sweepStaleChatRecoveryIncidents(now: number): Promise<void> {
     const entries = await this.ctx.storage.list<ChatRecoveryIncident>({
@@ -3458,6 +3478,15 @@ export class AIChatAgent<
       ? this._getPartialStreamText(streamId)
       : { text: "", parts: [] as MessagePart[] };
 
+    // Only persist while the stream is still active. The ACK handler (client
+    // reconnect → replayChunks) may have already persisted + completed the
+    // orphaned stream before fiber recovery runs; persisting again on the same
+    // chunks would double the assistant message's parts.
+    const streamStillActive =
+      streamId &&
+      this._resumableStream.hasActiveStream() &&
+      this._resumableStream.activeStreamId === streamId;
+
     const shouldRetryPreStream = this._shouldRetryRecoveredPreStreamTurn(
       recoverySnapshot,
       streamId ?? "",
@@ -3477,6 +3506,13 @@ export class AIChatAgent<
       });
 
     if (exhausted) {
+      // Preserve the settled partial before sealing the turn. Exhaustion is
+      // decided BEFORE `onChatRecovery` is consulted, so without this the
+      // settled (often non-idempotent) tool results the turn already produced
+      // are discarded and the model re-runs them on the next message (#1631).
+      if (streamStillActive) {
+        await this._persistOrphanedStream(streamId);
+      }
       await this._exhaustChatRecovery(incident, config);
       return true;
     }
@@ -3504,17 +3540,17 @@ export class AIChatAgent<
           createdAt: ctx.createdAt
         })) ?? {};
 
-      // Only persist and complete if the stream is still active. The ACK
-      // handler (client reconnect → replayChunks) may have already persisted
-      // the orphaned stream and completed it before fiber recovery runs.
-      // Without this guard, _persistOrphanedStream runs twice on the same
-      // chunks, doubling the assistant message's parts.
-      const streamStillActive =
-        streamId &&
-        this._resumableStream.hasActiveStream() &&
-        this._resumableStream.activeStreamId === streamId;
-
-      if (options.persist !== false && streamStillActive) {
+      // Settled work — completed, often non-idempotent tool results — is NEVER
+      // dropped by recovery. `persist: false` only suppresses persistence of a
+      // partial that has nothing settled to lose; a partial carrying settled
+      // tool results is persisted regardless, so an app can never accidentally
+      // discard completed work (and never needs `{ persist: true }` just to be
+      // safe). A safe default beats a warning about an unsafe one (#1631).
+      if (
+        streamStillActive &&
+        (options.persist !== false ||
+          this._partialHasSettledToolResults(partial.parts))
+      ) {
         await this._persistOrphanedStream(streamId);
       }
 

diff --git a/packages/ai-chat/src/tests/chat-recovery.test.ts b/packages/ai-chat/src/tests/chat-recovery.test.ts
@@ -1,5 +1,5 @@
 import { env } from "cloudflare:workers";
-import { describe, expect, it } from "vitest";
+import { describe, expect, it, vi } from "vitest";
 import { getAgentByName } from "agents";
 import type { UIMessage as ChatMessage } from "ai";
 import { connectChatWS, isUseChatResponseMessage } from "./test-utils";
@@ -37,6 +37,21 @@ interface ChatTestStub {
   ): Promise<void>;
   insertInterruptedFiber(name: string, snapshot?: unknown): Promise<void>;
   triggerFiberRecovery(): Promise<void>;
+  setChatRecoveryConfigForTest(config: {
+    maxAttempts?: number;
+    terminalMessage?: string;
+  }): Promise<void>;
+  seedIncidentForTest(incident: {
+    incidentId: string;
+    requestId: string;
+    recoveryKind: "retry" | "continue";
+    attempt: number;
+    maxAttempts: number;
+    status: string;
+    firstSeenAt: number;
+    lastAttemptAt: number;
+  }): Promise<void>;
+  getChatRecoveryIncidentsForTest(): Promise<Array<{ status: string }>>;
 }
 
 interface SlowStreamStub {
@@ -549,6 +564,182 @@ describe("chatRecovery", () => {
     });
   });
 
+  describe("recovery preserves settled work (#1631)", () => {
+    it("persists the settled partial when the recovery budget is exhausted", async () => {
+      const room = crypto.randomUUID();
+      const stub = (await getAgentByName(
+        env.ChatRecoveryTestAgent,
+        room
+      )) as unknown as ChatTestStub;
+      // maxAttempts: 1 so a seeded attempt at the cap exhausts on the next wake.
+      await stub.setChatRecoveryConfigForTest({ maxAttempts: 1 });
+
+      // text PLUS a settled (completed, non-idempotent) tool call — the work
+      // the budget-exhaustion path used to discard and force the model to re-run.
+      await stub.insertInterruptedStream("stream-exh", "req-exh", [
+        {
+          body: JSON.stringify({ type: "start", messageId: "a-exh" }),
+          index: 0
+        },
+        {
+          body: JSON.stringify({
+            type: "tool-input-available",
+            toolCallId: "tc-exh",
+            toolName: "writeFile",
+            input: { path: "out.txt" }
+          }),
+          index: 1
+        },
+        {
+          body: JSON.stringify({
+            type: "tool-output-available",
+            toolCallId: "tc-exh",
+            output: { bytesWritten: 12 }
+          }),
+          index: 2
+        },
+        { body: JSON.stringify({ type: "text-start" }), index: 3 },
+        {
+          body: JSON.stringify({ type: "text-delta", delta: "did real work" }),
+          index: 4
+        }
+      ]);
+      await stub.insertInterruptedFiber("__cf_internal_chat_turn:req-exh");
+      // Seed an incident already at the cap so this recovery exhausts.
+      // `lastAttemptAt` is aged past the alarm-debounce window (#1637/#1638) so
+      // this wake counts as a genuine new attempt (1 → 2 > maxAttempts) rather
+      // than being collapsed as a debounced reconnect (which would hold the
+      // attempt at 1 and never exhaust).
+      await stub.seedIncidentForTest({
+        incidentId: "req-exh:",
+        requestId: "req-exh",
+        recoveryKind: "continue",
+        attempt: 1,
+        maxAttempts: 1,
+        status: "scheduled",
+        firstSeenAt: Date.now() - 60_000,
+        lastAttemptAt: Date.now() - 60_000
+      });
+
+      await stub.triggerFiberRecovery();
+
+      // Exhaustion seals the turn but must NOT discard the settled partial.
+      const messages = await stub.getPersistedMessages();
+      const assistantMsgs = messages.filter((m) => m.role === "assistant");
+      expect(assistantMsgs).toHaveLength(1);
+      expect(extractAssistantText(messages)).toContain("did real work");
+      // The settled tool result is preserved (not just the text).
+      const settledTool = assistantMsgs[0]?.parts?.find((p) => {
+        const part = p as { type?: unknown; output?: unknown; state?: unknown };
+        return (
+          typeof part.type === "string" &&
+          part.type.startsWith("tool-") &&
+          (part.output !== undefined || part.state === "output-available")
+        );
+      });
+      expect(settledTool).toBeDefined();
+
+      const incidents = await stub.getChatRecoveryIncidentsForTest();
+      expect(incidents[0]?.status).toBe("exhausted");
+    });
+
+    it("never drops settled tool results on { persist: false } — preserves them anyway", async () => {
+      const room = crypto.randomUUID();
+      const stub = (await getAgentByName(
+        env.ChatRecoveryTestAgent,
+        room
+      )) as unknown as ChatTestStub;
+      await stub.setRecoveryOverride({ persist: false, continue: false });
+
+      const warnSpy = vi.spyOn(console, "warn").mockImplementation(() => {});
+      try {
+        await stub.insertInterruptedStream("stream-settled", "req-settled", [
+          {
+            body: JSON.stringify({ type: "start", messageId: "a-settled" }),
+            index: 0
+          },
+          {
+            body: JSON.stringify({
+              type: "tool-input-available",
+              toolCallId: "tc1",
+              toolName: "calc",
+              input: { x: 1 }
+            }),
+            index: 1
+          },
+          {
+            body: JSON.stringify({
+              type: "tool-output-available",
+              toolCallId: "tc1",
+              output: { result: 42 }
+            }),
+            index: 2
+          }
+        ]);
+        await stub.insertInterruptedFiber(
+          "__cf_internal_chat_turn:req-settled"
+        );
+
+        await stub.triggerFiberRecovery();
+
+        // R1: settled work is preserved regardless of `persist: false` — the
+        // assistant partial with the completed tool call IS persisted, with no
+        // warning (a safe default beats a warning about an unsafe one).
+        const messages = await stub.getPersistedMessages();
+        const assistantMsgs = messages.filter((m) => m.role === "assistant");
+        expect(assistantMsgs).toHaveLength(1);
+        const hasSettledTool = assistantMsgs[0]?.parts?.some((p) => {
+          const type = (p as { type?: unknown }).type;
+          return typeof type === "string" && type.startsWith("tool-");
+        });
+        expect(hasSettledTool).toBe(true);
+        expect(warnSpy).not.toHaveBeenCalled();
+      } finally {
+        warnSpy.mockRestore();
+      }
+    });
+
+    it("honors { persist: false } for a text-only partial with no settled work", async () => {
+      const room = crypto.randomUUID();
+      const stub = (await getAgentByName(
+        env.ChatRecoveryTestAgent,
+        room
+      )) as unknown as ChatTestStub;
+      await stub.setRecoveryOverride({ persist: false, continue: false });
+
+      const warnSpy = vi.spyOn(console, "warn").mockImplementation(() => {});
+      try {
+        await stub.insertInterruptedStream("stream-textonly", "req-textonly", [
+          {
+            body: JSON.stringify({ type: "start", messageId: "a-textonly" }),
+            index: 0
+          },
+          { body: JSON.stringify({ type: "text-start" }), index: 1 },
+          {
+            body: JSON.stringify({
+              type: "text-delta",
+              delta: "just prose, no tools"
+            }),
+            index: 2
+          }
+        ]);
+        await stub.insertInterruptedFiber(
+          "__cf_internal_chat_turn:req-textonly"
+        );
+
+        await stub.triggerFiberRecovery();
+
+        // No settled tool results to preserve, so `persist: false` is honored —
+        // nothing is persisted, and there is no warning.
+        const messages = await stub.getPersistedMessages();
+        expect(messages.filter((m) => m.role === "assistant")).toHaveLength(0);
+        expect(warnSpy).not.toHaveBeenCalled();
+      } finally {
+        warnSpy.mockRestore();
+      }
+    });
+  });
+
   describe("programmatic turn with chatRecovery=true", () => {
     it("wraps saveMessages-triggered turn in a fiber and cleans up", async () => {
       const room = crypto.randomUUID();