Skip to content

Commit fa38ec5

Browse files
committed
feat: add key pool rotation to all 5 ElevenLabs providers
1 parent 5fccac4 commit fa38ec5

5 files changed

Lines changed: 127 additions & 65 deletions

File tree

src/media/audio/providers/ElevenLabsSFXProvider.ts

Lines changed: 28 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@
2121

2222
import type { IAudioGenerator } from '../IAudioGenerator.js';
2323
import type { MusicGenerateRequest, SFXGenerateRequest, AudioResult } from '../types.js';
24+
import { ApiKeyPool } from '../../../core/providers/ApiKeyPool.js';
25+
import { isQuotaError } from '../../../core/providers/quotaErrors.js';
2426

2527
// ---------------------------------------------------------------------------
2628
// Configuration
@@ -85,6 +87,9 @@ export class ElevenLabsSFXProvider implements IAudioGenerator {
8587
/** Internal resolved configuration. */
8688
private _config!: Required<Pick<ElevenLabsSFXProviderConfig, 'apiKey' | 'baseURL'>>;
8789

90+
/** API key pool for round-robin rotation and quota failover. */
91+
private keyPool!: ApiKeyPool;
92+
8893
// -------------------------------------------------------------------------
8994
// Lifecycle
9095
// -------------------------------------------------------------------------
@@ -108,6 +113,7 @@ export class ElevenLabsSFXProvider implements IAudioGenerator {
108113
? config.baseURL.trim()
109114
: 'https://api.elevenlabs.io/v1',
110115
};
116+
this.keyPool = new ApiKeyPool(apiKey);
111117

112118
this.isInitialized = true;
113119
}
@@ -150,14 +156,28 @@ export class ElevenLabsSFXProvider implements IAudioGenerator {
150156

151157
if (request.durationSec !== undefined) body.duration_seconds = request.durationSec;
152158

153-
const response = await fetch(url, {
154-
method: 'POST',
155-
headers: {
156-
'xi-api-key': this._config.apiKey,
157-
'Content-Type': 'application/json',
158-
},
159-
body: JSON.stringify(body),
160-
});
159+
const doFetch = (key: string) =>
160+
fetch(url, {
161+
method: 'POST',
162+
headers: {
163+
'xi-api-key': key,
164+
'Content-Type': 'application/json',
165+
},
166+
body: JSON.stringify(body),
167+
});
168+
169+
const key = this.keyPool.next();
170+
let response = await doFetch(key);
171+
172+
if (!response.ok && this.keyPool.size > 1) {
173+
const errBody = await response.text().catch(() => '');
174+
if (isQuotaError(response.status, errBody)) {
175+
this.keyPool.markExhausted(key);
176+
response = await doFetch(this.keyPool.next());
177+
} else {
178+
throw new Error(`ElevenLabs SFX generation failed (${response.status}): ${errBody}`);
179+
}
180+
}
161181

162182
if (!response.ok) {
163183
const errorText = await response.text();

src/speech/providers/ElevenLabsTextToSpeechProvider.ts

Lines changed: 47 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@ import type {
44
SpeechVoice,
55
TextToSpeechProvider,
66
} from '../types.js';
7+
import { ApiKeyPool } from '../../core/providers/ApiKeyPool.js';
8+
import { isQuotaError } from '../../core/providers/quotaErrors.js';
79

810
/**
911
* Configuration for the {@link ElevenLabsTextToSpeechProvider}.
@@ -108,6 +110,9 @@ export class ElevenLabsTextToSpeechProvider implements TextToSpeechProvider {
108110
/** Fetch implementation — injected for testability, defaults to global fetch. */
109111
private readonly fetchImpl: typeof fetch;
110112

113+
/** API key pool for round-robin rotation and quota failover. */
114+
private readonly keyPool: ApiKeyPool;
115+
111116
/**
112117
* Creates a new ElevenLabsTextToSpeechProvider.
113118
*
@@ -124,6 +129,7 @@ export class ElevenLabsTextToSpeechProvider implements TextToSpeechProvider {
124129
*/
125130
constructor(private readonly config: ElevenLabsTextToSpeechProviderConfig) {
126131
this.fetchImpl = config.fetchImpl ?? fetch;
132+
this.keyPool = new ApiKeyPool(config.apiKey);
127133
}
128134

129135
/**
@@ -180,45 +186,53 @@ export class ElevenLabsTextToSpeechProvider implements TextToSpeechProvider {
180186

181187
const model = options.model ?? this.config.model ?? 'eleven_multilingual_v2';
182188

183-
const response = await this.fetchImpl(
184-
`${this.config.baseUrl ?? 'https://api.elevenlabs.io/v1'}/text-to-speech/${voiceId}`,
185-
{
189+
const baseUrl = this.config.baseUrl ?? 'https://api.elevenlabs.io/v1';
190+
const requestBody = JSON.stringify({
191+
text,
192+
model_id: model,
193+
voice_settings: {
194+
stability:
195+
typeof options.providerSpecificOptions?.stability === 'number'
196+
? options.providerSpecificOptions.stability
197+
: 0.5,
198+
similarity_boost:
199+
typeof options.providerSpecificOptions?.similarityBoost === 'number'
200+
? options.providerSpecificOptions.similarityBoost
201+
: 0.75,
202+
style:
203+
typeof options.providerSpecificOptions?.style === 'number'
204+
? options.providerSpecificOptions.style
205+
: undefined,
206+
use_speaker_boost:
207+
typeof options.providerSpecificOptions?.useSpeakerBoost === 'boolean'
208+
? options.providerSpecificOptions.useSpeakerBoost
209+
: true,
210+
},
211+
});
212+
213+
const doFetch = (key: string) =>
214+
this.fetchImpl(`${baseUrl}/text-to-speech/${voiceId}`, {
186215
method: 'POST',
187216
headers: {
188-
// ElevenLabs uses its own header format instead of standard Authorization
189-
'xi-api-key': this.config.apiKey,
217+
'xi-api-key': key,
190218
'Content-Type': 'application/json',
191-
// Request MP3 format in the response
192219
Accept: 'audio/mpeg',
193220
},
194-
body: JSON.stringify({
195-
text,
196-
model_id: model,
197-
voice_settings: {
198-
// Extract provider-specific settings with sensible defaults.
199-
// These defaults produce natural-sounding output for most voices.
200-
stability:
201-
typeof options.providerSpecificOptions?.stability === 'number'
202-
? options.providerSpecificOptions.stability
203-
: 0.5,
204-
similarity_boost:
205-
typeof options.providerSpecificOptions?.similarityBoost === 'number'
206-
? options.providerSpecificOptions.similarityBoost
207-
: 0.75,
208-
// Style is only meaningful for v2+ models; omit if not specified
209-
style:
210-
typeof options.providerSpecificOptions?.style === 'number'
211-
? options.providerSpecificOptions.style
212-
: undefined,
213-
// Speaker boost enhances vocal clarity and similarity
214-
use_speaker_boost:
215-
typeof options.providerSpecificOptions?.useSpeakerBoost === 'boolean'
216-
? options.providerSpecificOptions.useSpeakerBoost
217-
: true,
218-
},
219-
}),
221+
body: requestBody,
222+
});
223+
224+
const key = this.keyPool.next();
225+
let response = await doFetch(key);
226+
227+
if (!response.ok && this.keyPool.size > 1) {
228+
const errBody = await response.text().catch(() => '');
229+
if (isQuotaError(response.status, errBody)) {
230+
this.keyPool.markExhausted(key);
231+
response = await doFetch(this.keyPool.next());
232+
} else {
233+
throw new Error(`ElevenLabs synthesis failed (${response.status}): ${errBody}`);
220234
}
221-
);
235+
}
222236

223237
if (!response.ok) {
224238
const message = await response.text();

src/voice-pipeline/providers/ElevenLabsBatchTTS.ts

Lines changed: 36 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
*/
77

88
import type { IBatchTTS, BatchTTSConfig, BatchTTSResult } from '../types.js';
9+
import { ApiKeyPool } from '../../core/providers/ApiKeyPool.js';
10+
import { isQuotaError } from '../../core/providers/quotaErrors.js';
911

1012
/** Configuration for the ElevenLabs batch TTS provider. */
1113
export interface ElevenLabsBatchTTSConfig {
@@ -31,8 +33,8 @@ const BYTES_PER_SEC_MP3 = 16_000;
3133
export class ElevenLabsBatchTTS implements IBatchTTS {
3234
readonly providerId = 'elevenlabs-batch';
3335

34-
/** ElevenLabs API key used for authentication. */
35-
private readonly apiKey: string;
36+
/** API key pool for round-robin rotation and quota failover. */
37+
private readonly keyPool: ApiKeyPool;
3638

3739
/** Default voice ID when none is provided in the synthesis config. */
3840
private readonly defaultVoiceId: string;
@@ -44,7 +46,7 @@ export class ElevenLabsBatchTTS implements IBatchTTS {
4446
private readonly baseUrl: string;
4547

4648
constructor(config: ElevenLabsBatchTTSConfig) {
47-
this.apiKey = config.apiKey;
49+
this.keyPool = new ApiKeyPool(config.apiKey);
4850
this.defaultVoiceId = config.voiceId ?? 'EXAVITQu4vr4xnSDxMaL';
4951
this.model = config.model ?? 'eleven_multilingual_v2';
5052
this.baseUrl = config.baseUrl ?? 'https://api.elevenlabs.io/v1';
@@ -62,24 +64,38 @@ export class ElevenLabsBatchTTS implements IBatchTTS {
6264
const voiceId = config?.voice ?? this.defaultVoiceId;
6365
const opts = config?.providerOptions ?? {};
6466

65-
const res = await fetch(`${this.baseUrl}/text-to-speech/${voiceId}`, {
66-
method: 'POST',
67-
headers: {
68-
'xi-api-key': this.apiKey,
69-
'Content-Type': 'application/json',
70-
Accept: 'audio/mpeg',
71-
},
72-
body: JSON.stringify({
73-
text,
74-
model_id: config?.model ?? this.model,
75-
voice_settings: {
76-
stability: (opts.stability as number) ?? 0.5,
77-
similarity_boost: (opts.similarityBoost as number) ?? 0.75,
78-
style: (opts.style as number) ?? 0.0,
79-
use_speaker_boost: (opts.useSpeakerBoost as boolean) ?? true,
67+
const doFetch = (key: string) =>
68+
fetch(`${this.baseUrl}/text-to-speech/${voiceId}`, {
69+
method: 'POST',
70+
headers: {
71+
'xi-api-key': key,
72+
'Content-Type': 'application/json',
73+
Accept: 'audio/mpeg',
8074
},
81-
}),
82-
});
75+
body: JSON.stringify({
76+
text,
77+
model_id: config?.model ?? this.model,
78+
voice_settings: {
79+
stability: (opts.stability as number) ?? 0.5,
80+
similarity_boost: (opts.similarityBoost as number) ?? 0.75,
81+
style: (opts.style as number) ?? 0.0,
82+
use_speaker_boost: (opts.useSpeakerBoost as boolean) ?? true,
83+
},
84+
}),
85+
});
86+
87+
const key = this.keyPool.next();
88+
let res = await doFetch(key);
89+
90+
if (!res.ok && this.keyPool.size > 1) {
91+
const body = await res.text().catch(() => '');
92+
if (isQuotaError(res.status, body)) {
93+
this.keyPool.markExhausted(key);
94+
res = await doFetch(this.keyPool.next());
95+
} else {
96+
throw new Error(`ElevenLabs TTS failed: ${res.status} ${body.slice(0, 200)}`);
97+
}
98+
}
8399

84100
if (!res.ok) {
85101
const detail = await res.text().catch(() => '');

src/voice-pipeline/providers/ElevenLabsStreamingSTT.ts

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ import type {
3333
TranscriptEvent,
3434
TranscriptWord,
3535
} from '../types.js';
36+
import { ApiKeyPool } from '../../core/providers/ApiKeyPool.js';
3637

3738
// ---------------------------------------------------------------------------
3839
// Configuration
@@ -335,14 +336,19 @@ class ElevenLabsChunkedSTTSession extends EventEmitter implements StreamingSTTSe
335336
export class ElevenLabsStreamingSTT implements IStreamingSTT {
336337
readonly providerId = 'elevenlabs-streaming-stt';
337338
readonly isStreaming = true;
339+
private readonly keyPool: ApiKeyPool;
338340

339-
constructor(private readonly config: ElevenLabsStreamingSTTConfig) {}
341+
constructor(private readonly config: ElevenLabsStreamingSTTConfig) {
342+
this.keyPool = new ApiKeyPool(config.apiKey);
343+
}
340344

341345
/**
342346
* Create a new STT session. Uses chunked REST calls to ElevenLabs'
343347
* batch STT endpoint for near-realtime transcription.
348+
* Each session gets a fresh key from the round-robin pool.
344349
*/
345350
async startSession(config?: StreamingSTTConfig): Promise<StreamingSTTSession> {
346-
return new ElevenLabsChunkedSTTSession(this.config, config ?? {});
351+
const resolvedConfig = { ...this.config, apiKey: this.keyPool.next() };
352+
return new ElevenLabsChunkedSTTSession(resolvedConfig, config ?? {});
347353
}
348354
}

src/voice-pipeline/providers/ElevenLabsStreamingTTS.ts

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ import type {
3030
StreamingTTSConfig,
3131
EncodedAudioChunk,
3232
} from '../types.js';
33+
import { ApiKeyPool } from '../../core/providers/ApiKeyPool.js';
3334

3435
// ---------------------------------------------------------------------------
3536
// Configuration
@@ -315,15 +316,20 @@ class ElevenLabsStreamingTTSSession extends EventEmitter implements StreamingTTS
315316
*/
316317
export class ElevenLabsStreamingTTS implements IStreamingTTS {
317318
readonly providerId = 'elevenlabs-streaming';
319+
private readonly keyPool: ApiKeyPool;
318320

319-
constructor(private readonly config: ElevenLabsStreamingTTSConfig) {}
321+
constructor(private readonly config: ElevenLabsStreamingTTSConfig) {
322+
this.keyPool = new ApiKeyPool(config.apiKey);
323+
}
320324

321325
/**
322326
* Create a new streaming TTS session connected to ElevenLabs.
323327
* The session opens a WebSocket and is ready to receive text tokens.
328+
* Each session gets a fresh key from the round-robin pool.
324329
*/
325330
async startSession(config?: StreamingTTSConfig): Promise<StreamingTTSSession> {
326-
const session = new ElevenLabsStreamingTTSSession(this.config, config ?? {});
331+
const resolvedConfig = { ...this.config, apiKey: this.keyPool.next() };
332+
const session = new ElevenLabsStreamingTTSSession(resolvedConfig, config ?? {});
327333
await session.connect();
328334
return session;
329335
}

0 commit comments

Comments
 (0)