Skip to content

Commit fb602a1

Browse files
committed
fix(openai): use max_completion_tokens for gpt-5 + o-series models
Production paracosm broke after the per-call maxTokens caps shipped: OpenAI's gpt-5.x and o-series (reasoning) models reject max_tokens with HTTP 400 "Unsupported parameter: 'max_tokens' is not supported with this model. Use 'max_completion_tokens' instead." OpenAIProvider.buildPayload always sent max_tokens. Now sniffs the model id via modelRequiresMaxCompletionTokens() and routes to either max_tokens (legacy: gpt-4o, gpt-4-turbo, gpt-4.1, gpt-3.5) or max_completion_tokens (new: gpt-5.x, o1, o3, o4). Same value, different param name — the two are semantically equivalent within each generation. Errs conservative: unknown model ids (Anthropic, Llama, etc — not that they reach this code path, but defensively) fall through to max_tokens. Older deployments don't silently break when the param flag changes. 6 vitest cases pin the routing decision across known model families (gpt-5.x, o1/o3/o4, gpt-4o, gpt-4.1, gpt-3.5) and the case-insensitive match logic. Streaming + non-streaming completions both flow through the shared buildPayload path so the fix covers both.
1 parent 9f25c28 commit fb602a1

2 files changed

Lines changed: 82 additions & 1 deletion

File tree

src/core/llm/providers/implementations/OpenAIProvider.ts

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,29 @@ type _OpenAIAPIErrorResponse = {
166166
* Provides an interface to OpenAI's suite of models (GPT, Embeddings).
167167
* It handles API requests, streaming, error management, and model information.
168168
*/
169+
/**
170+
* Whether the given model id belongs to the family that requires
171+
* `max_completion_tokens` instead of the legacy `max_tokens` parameter.
172+
*
173+
* OpenAI's reasoning models (o1, o3, o4) and the GPT-5 family reject
174+
* `max_tokens` outright with HTTP 400 "Unsupported parameter:
175+
* 'max_tokens' is not supported with this model. Use
176+
* 'max_completion_tokens' instead." Legacy chat completions
177+
* (gpt-4o, gpt-4-turbo, gpt-4.1, gpt-3.5, etc.) still accept the
178+
* old field.
179+
*
180+
* Errs on the conservative side — any model id that is not a clear
181+
* member of one of the new families uses `max_tokens` so older
182+
* deployments do not silently break when the param-name flag changes.
183+
*
184+
* @param modelId Provider-side model identifier (e.g. `'gpt-5.4-mini'`).
185+
* @returns `true` when the model needs `max_completion_tokens`.
186+
*/
187+
export function modelRequiresMaxCompletionTokens(modelId: string): boolean {
188+
// o1 / o3 / o4 reasoning models, plus GPT-5 family.
189+
return /^(o\d|gpt-5)/i.test(modelId);
190+
}
191+
169192
export class OpenAIProvider implements IProvider {
170193
/** @inheritdoc */
171194
public readonly providerId: string = 'openai';
@@ -614,7 +637,18 @@ export class OpenAIProvider implements IProvider {
614637

615638
if (options.temperature !== undefined) payload.temperature = options.temperature;
616639
if (options.topP !== undefined) payload.top_p = options.topP;
617-
if (options.maxTokens !== undefined) payload.max_tokens = options.maxTokens;
640+
if (options.maxTokens !== undefined) {
641+
// OpenAI's reasoning + GPT-5 model families reject `max_tokens`
642+
// and require `max_completion_tokens` instead. Legacy chat
643+
// completions (gpt-4o, gpt-4-turbo, gpt-3.5, etc.) keep
644+
// `max_tokens`. The two fields are otherwise equivalent — same
645+
// semantic meaning, just renamed in the newer API surface.
646+
if (modelRequiresMaxCompletionTokens(modelId)) {
647+
payload.max_completion_tokens = options.maxTokens;
648+
} else {
649+
payload.max_tokens = options.maxTokens;
650+
}
651+
}
618652
if (options.presencePenalty !== undefined) payload.presence_penalty = options.presencePenalty;
619653
if (options.frequencyPenalty !== undefined) payload.frequency_penalty = options.frequencyPenalty;
620654
if (options.stopSequences !== undefined) payload.stop = options.stopSequences;
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
/**
2+
* @fileoverview Tests for the model-id sniff that decides whether to
3+
* send `max_tokens` (legacy) or `max_completion_tokens` (newer
4+
* reasoning + GPT-5 families) to the OpenAI API.
5+
*/
6+
import { describe, it, expect } from 'vitest';
7+
import { modelRequiresMaxCompletionTokens } from '../implementations/OpenAIProvider.js';
8+
9+
describe('modelRequiresMaxCompletionTokens', () => {
10+
it('returns true for the GPT-5 family', () => {
11+
expect(modelRequiresMaxCompletionTokens('gpt-5')).toBe(true);
12+
expect(modelRequiresMaxCompletionTokens('gpt-5.4')).toBe(true);
13+
expect(modelRequiresMaxCompletionTokens('gpt-5.4-mini')).toBe(true);
14+
expect(modelRequiresMaxCompletionTokens('gpt-5.4-nano')).toBe(true);
15+
});
16+
17+
it('returns true for o-series reasoning models', () => {
18+
expect(modelRequiresMaxCompletionTokens('o1')).toBe(true);
19+
expect(modelRequiresMaxCompletionTokens('o1-mini')).toBe(true);
20+
expect(modelRequiresMaxCompletionTokens('o3')).toBe(true);
21+
expect(modelRequiresMaxCompletionTokens('o3-mini')).toBe(true);
22+
expect(modelRequiresMaxCompletionTokens('o4-mini')).toBe(true);
23+
});
24+
25+
it('returns false for legacy gpt-4 family models that still accept max_tokens', () => {
26+
expect(modelRequiresMaxCompletionTokens('gpt-4o')).toBe(false);
27+
expect(modelRequiresMaxCompletionTokens('gpt-4o-mini')).toBe(false);
28+
expect(modelRequiresMaxCompletionTokens('gpt-4-turbo')).toBe(false);
29+
expect(modelRequiresMaxCompletionTokens('gpt-4.1')).toBe(false);
30+
expect(modelRequiresMaxCompletionTokens('gpt-4.1-mini')).toBe(false);
31+
});
32+
33+
it('returns false for gpt-3.5 + chat completions models', () => {
34+
expect(modelRequiresMaxCompletionTokens('gpt-3.5-turbo')).toBe(false);
35+
});
36+
37+
it('is case-insensitive', () => {
38+
expect(modelRequiresMaxCompletionTokens('GPT-5')).toBe(true);
39+
expect(modelRequiresMaxCompletionTokens('O1-Mini')).toBe(true);
40+
});
41+
42+
it('errs conservative for unknown model ids — uses legacy max_tokens', () => {
43+
expect(modelRequiresMaxCompletionTokens('claude-sonnet-4-6')).toBe(false);
44+
expect(modelRequiresMaxCompletionTokens('llama3:8b')).toBe(false);
45+
expect(modelRequiresMaxCompletionTokens('mystery-model')).toBe(false);
46+
});
47+
});

0 commit comments

Comments
 (0)