Skip to content

Commit bddcb6a

Browse files
committed
feat(voice): per-provider CircuitBreaker with auth-permanent + cooldown recovery
State machine: healthy -> (N failures in W window) -> tripped -> (cooldown T elapsed) -> healthy. Auth failures bypass the threshold and trip permanently until recordSuccess() or operator intervention — a bad key won't recover on its own. Includes onStateChange listeners for chain integration and a tick() method for externally-driven recovery timers. Task 3/17.
1 parent 41368b1 commit bddcb6a

2 files changed

Lines changed: 231 additions & 0 deletions

File tree

Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
/**
2+
* @module voice-pipeline/CircuitBreaker
3+
*
4+
* Per-provider state machine: tracks failures within a sliding window, trips
5+
* when the failure count crosses a threshold, and auto-recovers after a
6+
* cooldown. Auth failures trip permanently (cooldown = Infinity) because a
7+
* bad API key won't fix itself without operator intervention.
8+
*/
9+
10+
import type { HealthErrorClass } from './VoicePipelineError.js';
11+
12+
export type BreakerState = 'healthy' | 'tripped';
13+
14+
export interface CircuitBreakerOptions {
15+
failureThreshold: number;
16+
windowMs: number;
17+
cooldownMs: number;
18+
now?: () => number;
19+
}
20+
21+
export interface StateChangeEvent {
22+
providerId: string;
23+
from: BreakerState;
24+
to: BreakerState;
25+
reason?: HealthErrorClass | 'recover';
26+
}
27+
28+
interface ProviderRecord {
29+
failures: number[];
30+
trippedAt: number | null;
31+
trippedUntil: number;
32+
currentState: BreakerState;
33+
}
34+
35+
export class CircuitBreaker {
36+
private readonly failureThreshold: number;
37+
private readonly windowMs: number;
38+
private readonly cooldownMs: number;
39+
private readonly nowFn: () => number;
40+
private readonly records = new Map<string, ProviderRecord>();
41+
private readonly listeners = new Set<(event: StateChangeEvent) => void>();
42+
43+
constructor(opts: CircuitBreakerOptions) {
44+
this.failureThreshold = opts.failureThreshold;
45+
this.windowMs = opts.windowMs;
46+
this.cooldownMs = opts.cooldownMs;
47+
this.nowFn = opts.now ?? (() => Date.now());
48+
}
49+
50+
state(providerId: string): BreakerState {
51+
const rec = this.getOrCreate(providerId);
52+
if (rec.currentState === 'tripped' && this.nowFn() >= rec.trippedUntil) {
53+
this.transition(providerId, rec, 'healthy', 'recover');
54+
}
55+
return rec.currentState;
56+
}
57+
58+
isAvailable(providerId: string): boolean {
59+
return this.state(providerId) === 'healthy';
60+
}
61+
62+
recordFailure(providerId: string, reason: HealthErrorClass): void {
63+
const rec = this.getOrCreate(providerId);
64+
const now = this.nowFn();
65+
66+
// Auth failures are terminal — a bad key won't recover on its own.
67+
if (reason === 'auth') {
68+
rec.trippedAt = now;
69+
rec.trippedUntil = Number.POSITIVE_INFINITY;
70+
rec.failures = [];
71+
this.transition(providerId, rec, 'tripped', 'auth');
72+
return;
73+
}
74+
75+
rec.failures.push(now);
76+
rec.failures = rec.failures.filter((t) => now - t <= this.windowMs);
77+
if (rec.failures.length >= this.failureThreshold) {
78+
rec.trippedAt = now;
79+
rec.trippedUntil = now + this.cooldownMs;
80+
rec.failures = [];
81+
this.transition(providerId, rec, 'tripped', reason);
82+
}
83+
}
84+
85+
recordSuccess(providerId: string): void {
86+
const rec = this.getOrCreate(providerId);
87+
rec.failures = [];
88+
if (rec.currentState === 'tripped') {
89+
rec.trippedAt = null;
90+
rec.trippedUntil = 0;
91+
this.transition(providerId, rec, 'healthy', 'recover');
92+
}
93+
}
94+
95+
/** Force a state-transition pass for all tracked providers. Useful when
96+
* the caller wants to drive recoveries on a timer. */
97+
tick(_nowHint?: number): void {
98+
for (const id of this.records.keys()) {
99+
void this.state(id);
100+
}
101+
}
102+
103+
onStateChange(fn: (event: StateChangeEvent) => void): () => void {
104+
this.listeners.add(fn);
105+
return () => this.listeners.delete(fn);
106+
}
107+
108+
private getOrCreate(providerId: string): ProviderRecord {
109+
let rec = this.records.get(providerId);
110+
if (!rec) {
111+
rec = {
112+
failures: [],
113+
trippedAt: null,
114+
trippedUntil: 0,
115+
currentState: 'healthy',
116+
};
117+
this.records.set(providerId, rec);
118+
}
119+
return rec;
120+
}
121+
122+
private transition(
123+
providerId: string,
124+
rec: ProviderRecord,
125+
to: BreakerState,
126+
reason: StateChangeEvent['reason']
127+
): void {
128+
const from = rec.currentState;
129+
if (from === to) return;
130+
rec.currentState = to;
131+
for (const fn of this.listeners) {
132+
try {
133+
fn({ providerId, from, to, reason });
134+
} catch {
135+
/* one bad listener must not poison the rest */
136+
}
137+
}
138+
}
139+
}
Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
import { describe, it, expect, vi } from 'vitest';
2+
import { CircuitBreaker } from '../CircuitBreaker.js';
3+
4+
function mkBreaker() {
5+
return new CircuitBreaker({
6+
failureThreshold: 3,
7+
windowMs: 60_000,
8+
cooldownMs: 60_000,
9+
now: () => Date.now(),
10+
});
11+
}
12+
13+
describe('CircuitBreaker', () => {
14+
it('starts healthy', () => {
15+
const b = mkBreaker();
16+
expect(b.state('deepgram')).toBe('healthy');
17+
expect(b.isAvailable('deepgram')).toBe(true);
18+
});
19+
20+
it('trips after failureThreshold failures in window', () => {
21+
const b = mkBreaker();
22+
b.recordFailure('deepgram', 'network');
23+
b.recordFailure('deepgram', 'network');
24+
expect(b.state('deepgram')).toBe('healthy');
25+
b.recordFailure('deepgram', 'network');
26+
expect(b.state('deepgram')).toBe('tripped');
27+
expect(b.isAvailable('deepgram')).toBe(false);
28+
});
29+
30+
it('auth failures trip immediately and do not auto-recover', () => {
31+
const b = mkBreaker();
32+
b.recordFailure('deepgram', 'auth');
33+
expect(b.state('deepgram')).toBe('tripped');
34+
b.tick(Date.now() + 10 * 60_000);
35+
expect(b.state('deepgram')).toBe('tripped');
36+
});
37+
38+
it('recovers after cooldown', () => {
39+
const nowRef = { t: 1_000_000 };
40+
const b = new CircuitBreaker({
41+
failureThreshold: 1,
42+
windowMs: 60_000,
43+
cooldownMs: 60_000,
44+
now: () => nowRef.t,
45+
});
46+
b.recordFailure('eleven', 'service');
47+
expect(b.state('eleven')).toBe('tripped');
48+
nowRef.t += 30_000;
49+
expect(b.state('eleven')).toBe('tripped');
50+
nowRef.t += 31_000;
51+
expect(b.state('eleven')).toBe('healthy');
52+
});
53+
54+
it('recordSuccess clears failure counter', () => {
55+
const b = mkBreaker();
56+
b.recordFailure('deepgram', 'network');
57+
b.recordFailure('deepgram', 'network');
58+
b.recordSuccess('deepgram');
59+
b.recordFailure('deepgram', 'network');
60+
b.recordFailure('deepgram', 'network');
61+
expect(b.state('deepgram')).toBe('healthy');
62+
});
63+
64+
it('notifies subscribers on state transitions', () => {
65+
const b = mkBreaker();
66+
const onChange = vi.fn();
67+
b.onStateChange(onChange);
68+
b.recordFailure('deepgram', 'auth');
69+
expect(onChange).toHaveBeenCalledWith({
70+
providerId: 'deepgram',
71+
from: 'healthy',
72+
to: 'tripped',
73+
reason: 'auth',
74+
});
75+
});
76+
77+
it('only counts failures within window', () => {
78+
const nowRef = { t: 1_000_000 };
79+
const b = new CircuitBreaker({
80+
failureThreshold: 3,
81+
windowMs: 10_000,
82+
cooldownMs: 60_000,
83+
now: () => nowRef.t,
84+
});
85+
b.recordFailure('eleven', 'network');
86+
nowRef.t += 11_000;
87+
b.recordFailure('eleven', 'network');
88+
b.recordFailure('eleven', 'network');
89+
// Only 2 failures inside the 10s window — still healthy.
90+
expect(b.state('eleven')).toBe('healthy');
91+
});
92+
});

0 commit comments

Comments
 (0)