Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
13bedd1
🤖 fix: preserve backoff progression through manual retries
ammar-agent Nov 10, 2025
be78a6c
🤖 refactor: consolidate retry backoff logic
ammar-agent Nov 10, 2025
56fed70
🤖 refactor: simplify retry state tests
ammar-agent Nov 10, 2025
0c10865
🤖 feat: add debug flag to force all errors retryable
ammar-agent Nov 10, 2025
ae0ad32
🤖 docs: clarify debug flag requires manual retry to clear state
ammar-agent Nov 10, 2025
e27e355
🤖 debug: add console logging for retry eligibility
ammar-agent Nov 10, 2025
c1d7d2e
🤖 debug: switch to console.log for better visibility
ammar-agent Nov 10, 2025
165abec
🤖 refactor: make retry logs conditional on debug flag
ammar-agent Nov 10, 2025
0ac7357
🤖 fix: prevent backoff reset on stream start
ammar-agent Nov 10, 2025
ea12c69
🤖 debug: add targeted logging for attempt counter
ammar-agent Nov 10, 2025
d66b005
🤖 fix: configure HMR for custom host in dev-server
ammar-agent Nov 10, 2025
804315b
🤖 fix: reset retry state on stream-end not stream-start
ammar-agent Nov 10, 2025
a3b5635
🤖 fix: increment retry counter on stream-error event
ammar-agent Nov 10, 2025
a7b2f1e
🤖 fix: actually increment retry counter on stream-error
ammar-agent Nov 10, 2025
28b33b7
🤖 refactor: clean up retry logging and simplify code
ammar-agent Nov 10, 2025
aa11899
🤖 fix: remove unused imports
ammar-agent Nov 10, 2025
66d83ba
🤖 refactor: use callback pattern in updatePersistedState
ammar-agent Nov 10, 2025
62e957e
🤖 fix: guard window access in retry eligibility helpers
ammar-agent Nov 10, 2025
4672fa3
🤖 refactor: extract isForceAllRetryableEnabled helper (DRY)
ammar-agent Nov 10, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 11 additions & 16 deletions src/components/Messages/ChatBarrier/RetryBarrier.tsx
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import React, { useState, useEffect, useCallback, useMemo } from "react";
import React, { useState, useEffect, useMemo } from "react";
import { usePersistedState, updatePersistedState } from "@/hooks/usePersistedState";
import { getRetryStateKey, getAutoRetryKey } from "@/constants/storage";
import { CUSTOM_EVENTS, createCustomEvent } from "@/constants/events";
Expand All @@ -7,15 +7,13 @@ import type { RetryState } from "@/hooks/useResumeManager";
import { useWorkspaceState } from "@/stores/WorkspaceStore";
import { isEligibleForAutoRetry, isNonRetryableSendError } from "@/utils/messages/retryEligibility";
import { formatSendMessageError } from "@/utils/errors/formatSendError";
import { createManualRetryState, calculateBackoffDelay } from "@/utils/messages/retryState";

interface RetryBarrierProps {
workspaceId: string;
className?: string;
}

const INITIAL_DELAY = 1000; // 1 second
const MAX_DELAY = 60000; // 60 seconds (cap for exponential backoff)

const defaultRetryState: RetryState = {
attempt: 0,
retryStartTime: Date.now(),
Expand Down Expand Up @@ -45,7 +43,9 @@ export const RetryBarrier: React.FC<RetryBarrierProps> = ({ workspaceId, classNa
// Compute effective autoRetry state: user preference AND error is retryable
// This ensures UI shows "Retry" button (not "Retrying...") for non-retryable errors
const effectiveAutoRetry = useMemo(() => {
if (!autoRetry || !workspaceState) return false;
if (!autoRetry || !workspaceState) {
return false;
}

// Check if current state is eligible for auto-retry
const messagesEligible = isEligibleForAutoRetry(
Expand All @@ -54,6 +54,7 @@ export const RetryBarrier: React.FC<RetryBarrierProps> = ({ workspaceId, classNa
);

// Also check RetryState for SendMessageErrors (from resumeStream failures)
// Note: isNonRetryableSendError already respects window.__CMUX_FORCE_ALL_RETRYABLE
if (lastError && isNonRetryableSendError(lastError)) {
return false; // Non-retryable SendMessageError
}
Expand All @@ -64,37 +65,31 @@ export const RetryBarrier: React.FC<RetryBarrierProps> = ({ workspaceId, classNa
// Local state for UI
const [countdown, setCountdown] = useState(0);

// Calculate delay with exponential backoff (same as useResumeManager)
const getDelay = useCallback((attemptNum: number) => {
const exponentialDelay = INITIAL_DELAY * Math.pow(2, attemptNum);
return Math.min(exponentialDelay, MAX_DELAY);
}, []);

// Update countdown display (pure display logic, no side effects)
// useResumeManager handles the actual retry logic
useEffect(() => {
if (!autoRetry) return;

const interval = setInterval(() => {
const delay = getDelay(attempt);
const delay = calculateBackoffDelay(attempt);
const nextRetryTime = retryStartTime + delay;
const timeUntilRetry = Math.max(0, nextRetryTime - Date.now());

setCountdown(Math.ceil(timeUntilRetry / 1000));
}, 100);

return () => clearInterval(interval);
}, [autoRetry, attempt, retryStartTime, getDelay]);
}, [autoRetry, attempt, retryStartTime]);

// Manual retry handler (user-initiated, immediate)
// Emits event to useResumeManager instead of calling resumeStream directly
// This keeps all retry logic centralized in one place
const handleManualRetry = () => {
setAutoRetry(true); // Re-enable auto-retry for next failure

// Clear retry state to make workspace immediately eligible for resume
// Use updatePersistedState to ensure listener-enabled hooks receive the update
updatePersistedState(getRetryStateKey(workspaceId), null);
// Create manual retry state: immediate retry BUT preserves attempt counter
// This prevents infinite retry loops without backoff if the retry fails
updatePersistedState(getRetryStateKey(workspaceId), createManualRetryState(attempt));

// Emit event to useResumeManager - it will handle the actual resume
// Pass isManual flag to bypass eligibility checks (user explicitly wants to retry)
Expand Down
43 changes: 23 additions & 20 deletions src/hooks/useResumeManager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,18 @@ import { readPersistedState, updatePersistedState } from "./usePersistedState";
import { isEligibleForAutoRetry, isNonRetryableSendError } from "@/utils/messages/retryEligibility";
import { applyCompactionOverrides } from "@/utils/messages/compactionOptions";
import type { SendMessageError } from "@/types/errors";
import {
createFailedRetryState,
calculateBackoffDelay,
INITIAL_DELAY,
} from "@/utils/messages/retryState";

export interface RetryState {
attempt: number;
retryStartTime: number;
lastError?: SendMessageError;
}

const INITIAL_DELAY = 1000; // 1 second
const MAX_DELAY = 60000; // 60 seconds

/**
* Centralized auto-resume manager for interrupted streams
*
Expand Down Expand Up @@ -122,7 +124,7 @@ export function useResumeManager() {

// 5. Check exponential backoff timer
const { attempt, retryStartTime } = retryState;
const delay = Math.min(INITIAL_DELAY * Math.pow(2, attempt), MAX_DELAY);
const delay = calculateBackoffDelay(attempt);
const timeSinceLastRetry = Date.now() - retryStartTime;

if (timeSinceLastRetry < delay) return false; // Not time yet
Expand Down Expand Up @@ -151,6 +153,9 @@ export function useResumeManager() {
});

const { attempt } = retryState;
console.debug(
`[retry] ${workspaceId} attemptResume: current attempt=${attempt}, isManual=${isManual}`
);

try {
// Start with workspace defaults
Expand All @@ -171,27 +176,25 @@ export function useResumeManager() {

if (!result.success) {
// Store error in retry state so RetryBarrier can display it
const newState: RetryState = {
attempt: attempt + 1,
retryStartTime: Date.now(),
lastError: result.error,
};
const newState = createFailedRetryState(attempt, result.error);
console.debug(
`[retry] ${workspaceId} resumeStream failed: attempt ${attempt} → ${newState.attempt}`
);
updatePersistedState(getRetryStateKey(workspaceId), newState);
} else {
// Success - clear retry state entirely
// If stream fails again, we'll start fresh (immediately eligible)
updatePersistedState(getRetryStateKey(workspaceId), null);
}
// Note: Don't clear retry state on success - stream-end event will handle that
// resumeStream success just means "stream initiated", not "stream completed"
// Clearing here causes backoff reset bug when stream starts then immediately fails
} catch (error) {
// Store error in retry state for display
const newState: RetryState = {
attempt: attempt + 1,
retryStartTime: Date.now(),
lastError: {
type: "unknown",
raw: error instanceof Error ? error.message : "Failed to resume stream",
},
const errorData: SendMessageError = {
type: "unknown",
raw: error instanceof Error ? error.message : "Failed to resume stream",
};
const newState = createFailedRetryState(attempt, errorData);
console.debug(
`[retry] ${workspaceId} resumeStream exception: attempt ${attempt} → ${newState.attempt}`
);
updatePersistedState(getRetryStateKey(workspaceId), newState);
} finally {
// Always clear retrying flag
Expand Down
28 changes: 24 additions & 4 deletions src/stores/WorkspaceStore.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import type { TokenConsumer } from "@/types/chatStats";
import type { LanguageModelV2Usage } from "@ai-sdk/provider";
import { getCancelledCompactionKey } from "@/constants/storage";
import { isCompactingStream, findCompactionRequestMessage } from "@/utils/compaction/handler";
import { createFreshRetryState } from "@/utils/messages/retryState";

export interface WorkspaceState {
name: string; // User-facing workspace name (e.g., "feature-branch")
Expand Down Expand Up @@ -123,10 +124,8 @@ export class WorkspaceStore {
if (this.onModelUsed) {
this.onModelUsed((data as { model: string }).model);
}
updatePersistedState(getRetryStateKey(workspaceId), {
attempt: 0,
retryStartTime: Date.now(),
});
// Don't reset retry state here - stream might still fail after starting
// Retry state will be reset on stream-end (successful completion)
this.states.bump(workspaceId);
},
"stream-delta": (workspaceId, aggregator, data) => {
Expand All @@ -141,6 +140,9 @@ export class WorkspaceStore {
return;
}

// Reset retry state on successful stream completion
updatePersistedState(getRetryStateKey(workspaceId), createFreshRetryState());

this.states.bump(workspaceId);
this.checkAndBumpRecencyIfChanged();
this.finalizeUsageStats(workspaceId, (data as { metadata?: never }).metadata);
Expand Down Expand Up @@ -920,6 +922,24 @@ export class WorkspaceStore {
// Handle non-buffered special events first
if (isStreamError(data)) {
aggregator.handleStreamError(data);

// Increment retry attempt counter when stream fails
// This handles auth errors that happen AFTER stream-start
updatePersistedState(
getRetryStateKey(workspaceId),
(prev) => {
const newAttempt = prev.attempt + 1;
console.debug(
`[retry] ${workspaceId} stream-error: incrementing attempt ${prev.attempt} → ${newAttempt}`
);
return {
attempt: newAttempt,
retryStartTime: Date.now(),
};
},
{ attempt: 0, retryStartTime: Date.now() }
);

this.states.bump(workspaceId);
this.dispatchResumeCheck(workspaceId);
return;
Expand Down
33 changes: 33 additions & 0 deletions src/utils/messages/retryEligibility.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,30 @@
import type { DisplayedMessage } from "@/types/message";
import type { StreamErrorType, SendMessageError } from "@/types/errors";

/**
* Debug flag to force all errors to be retryable
* Set in browser console: window.__CMUX_FORCE_ALL_RETRYABLE = true
*
* Useful for testing retry/backoff logic without needing to simulate
* specific network conditions or rate limits.
*
* Note: If you set this flag after an error occurs, you may need to
* trigger a manual retry first (click "Retry" button) to clear the
* stored non-retryable error state.
*/
declare global {
interface Window {
__CMUX_FORCE_ALL_RETRYABLE?: boolean;
}
}

/**
* Check if the debug flag to force all errors to be retryable is enabled
*/
function isForceAllRetryableEnabled(): boolean {
return typeof window !== "undefined" && window.__CMUX_FORCE_ALL_RETRYABLE === true;
}

/**
* Error types that should NOT be auto-retried because they require user action
* These errors won't resolve on their own - the user must fix the underlying issue
Expand All @@ -17,6 +41,11 @@ const NON_RETRYABLE_STREAM_ERRORS: StreamErrorType[] = [
* Check if a SendMessageError (from resumeStream failures) is non-retryable
*/
export function isNonRetryableSendError(error: SendMessageError): boolean {
// Debug flag: force all errors to be retryable
if (isForceAllRetryableEnabled()) {
return false;
}

switch (error.type) {
case "api_key_not_found": // Missing API key - user must configure
case "provider_not_supported": // Unsupported provider - user must switch
Expand Down Expand Up @@ -91,6 +120,10 @@ export function isEligibleForAutoRetry(
// (but manual retry is still available via hasInterruptedStream)
const lastMessage = messages[messages.length - 1];
if (lastMessage.type === "stream-error") {
// Debug flag: force all errors to be retryable
if (isForceAllRetryableEnabled()) {
return true;
}
return !NON_RETRYABLE_STREAM_ERRORS.includes(lastMessage.errorType);
}

Expand Down
99 changes: 99 additions & 0 deletions src/utils/messages/retryState.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
import { describe, it, expect } from "bun:test";
import {
createFreshRetryState,
createManualRetryState,
createFailedRetryState,
calculateBackoffDelay,
INITIAL_DELAY,
} from "./retryState";

describe("retryState utilities", () => {
describe("calculateBackoffDelay", () => {
it("returns exponential backoff: 1s → 2s → 4s → 8s...", () => {
expect(calculateBackoffDelay(0)).toBe(1000);
expect(calculateBackoffDelay(1)).toBe(2000);
expect(calculateBackoffDelay(2)).toBe(4000);
expect(calculateBackoffDelay(3)).toBe(8000);
});

it("caps at 60 seconds for large attempts", () => {
expect(calculateBackoffDelay(6)).toBe(60000);
expect(calculateBackoffDelay(10)).toBe(60000);
});
});

describe("createFreshRetryState", () => {
it("creates a state with attempt 0 and no error", () => {
const state = createFreshRetryState();
expect(state.attempt).toBe(0);
expect(state.lastError).toBeUndefined();
expect(state.retryStartTime).toBeLessThanOrEqual(Date.now());
});
});

describe("createManualRetryState", () => {
it("preserves attempt counter (critical for backoff)", () => {
const currentAttempt = 3;
const state = createManualRetryState(currentAttempt);

// CRITICAL: Manual retry must preserve attempt counter
// This ensures exponential backoff continues if the retry fails
expect(state.attempt).toBe(currentAttempt);
});

it("makes retry immediately eligible by backdating retryStartTime", () => {
const state = createManualRetryState(0);
const expectedTime = Date.now() - INITIAL_DELAY;
expect(state.retryStartTime).toBeLessThanOrEqual(expectedTime);
});

it("clears any previous error", () => {
const state = createManualRetryState(2);
expect(state.lastError).toBeUndefined();
});

it("prevents no-backoff bug: preserves attempt counter for continued backoff", () => {
// Bug scenario: After 3 failed attempts, manual retry should preserve counter
// so next failure waits 2^3=8s, not reset to 2^0=1s
const state = createManualRetryState(3);
expect(state.attempt).toBe(3); // NOT reset to 0
});
});

describe("createFailedRetryState", () => {
it("increments attempt counter and stores error", () => {
const error = { type: "unknown" as const, raw: "Test error" };
const state = createFailedRetryState(2, error);

expect(state.attempt).toBe(3);
expect(state.lastError).toEqual(error);
expect(state.retryStartTime).toBeLessThanOrEqual(Date.now());
});
});

describe("backoff progression scenario", () => {
it("maintains exponential backoff through manual retries", () => {
// 3 auto-retry failures → manual retry → preserves attempt counter
let state = createFailedRetryState(0, { type: "unknown" as const, raw: "Error" });
state = createFailedRetryState(state.attempt, { type: "unknown" as const, raw: "Error" });
state = createFailedRetryState(state.attempt, { type: "unknown" as const, raw: "Error" });
expect(state.attempt).toBe(3);

state = createManualRetryState(state.attempt);
expect(state.attempt).toBe(3); // NOT reset to 0

state = createFailedRetryState(state.attempt, { type: "unknown" as const, raw: "Error" });
expect(state.attempt).toBe(4); // Continues progression
});

it("resets backoff on successful stream start", () => {
let state = createFailedRetryState(0, { type: "unknown" as const, raw: "Error" });
state = createFailedRetryState(state.attempt, { type: "unknown" as const, raw: "Error" });
expect(state.attempt).toBe(2);

state = createFreshRetryState();
expect(state.attempt).toBe(0);
expect(state.lastError).toBeUndefined();
});
});
});
Loading