Skip to content

Commit cecb36c

Browse files
committed
feat(agent): expose per-call maxTokens on agent() config
BaseAgentConfig now carries an optional maxTokens field. When set, it's forwarded to every generateText / streamText call the agent makes — generate(), session.send(), and stream(). Unset preserves current behavior (providers fall back to their defaults, typically 4-8k). The driver: agency-style sessions (e.g. paracosm's commander + department agents) had no way to cap tail spend on a misbehaving model. Per-call maxTokens was accessible on generateText / generateObject directly, but not on agent sessions, so high-volume session-backed call sites defaulted to provider caps on every invocation. Two tests pin the behavior: maxTokens flows through agent.generate() and session.send() when configured, and stays undefined on the generateText call when omitted.
1 parent 7885a91 commit cecb36c

3 files changed

Lines changed: 41 additions & 0 deletions

File tree

src/api/agent.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -380,6 +380,10 @@ export function agent(opts: AgentOptions): Agent {
380380
system: opts.systemBlocks ?? buildSystemPrompt(opts),
381381
tools: opts.tools,
382382
maxSteps: opts.maxSteps ?? 5,
383+
// Per-call completion-token cap applied to every generate /
384+
// session.send / stream invocation this agent makes. Unset means
385+
// the underlying generateText falls back to the provider default.
386+
maxTokens: opts.maxTokens,
383387
chainOfThought: opts.chainOfThought ?? true,
384388
apiKey: opts.apiKey,
385389
baseUrl: opts.baseUrl,

src/api/runtime/__tests__/agent.test.ts

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -202,4 +202,29 @@ describe('agent', () => {
202202
})
203203
);
204204
});
205+
206+
it('forwards maxTokens from agent config to every generateText call (generate / session.send)', async () => {
207+
const assistant = agent({
208+
model: 'openai:gpt-4.1-mini',
209+
instructions: 'be brief',
210+
maxTokens: 1500,
211+
});
212+
213+
await assistant.generate('Hello.');
214+
expect(hoisted.generateText).toHaveBeenLastCalledWith(
215+
expect.objectContaining({ maxTokens: 1500 }),
216+
);
217+
218+
await assistant.session('s1').send('Hello again.');
219+
expect(hoisted.generateText).toHaveBeenLastCalledWith(
220+
expect.objectContaining({ maxTokens: 1500 }),
221+
);
222+
});
223+
224+
it('omits maxTokens from the generateText call when agent config does not set it', async () => {
225+
const assistant = agent({ model: 'openai:gpt-4.1-mini', instructions: 'be brief' });
226+
await assistant.generate('Hello.');
227+
const callArgs = hoisted.generateText.mock.calls.at(-1)?.[0];
228+
expect(callArgs?.maxTokens).toBeUndefined();
229+
});
205230
});

src/api/types.ts

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1191,6 +1191,18 @@ export interface BaseAgentConfig {
11911191
tools?: AdaptableToolInput;
11921192
/** Maximum number of agentic steps (LLM calls) per invocation. Defaults to `5`. */
11931193
maxSteps?: number;
1194+
/**
1195+
* Upper bound on completion tokens for each LLM call the agent makes.
1196+
* Forwarded to the underlying `generateText` / `streamText` call on
1197+
* every `generate()`, `stream()`, and `session.send()` invocation.
1198+
*
1199+
* Caps tail spend when a model misbehaves and yaps past the intended
1200+
* output size — without it, calls fall back to the provider default
1201+
* (OpenAI 4096, Anthropic 4096-8192). Set to ~2× the agent's typical
1202+
* response size so normal calls finish naturally and only runaway
1203+
* generations hit the cap. Omit to use the provider default.
1204+
*/
1205+
maxTokens?: number;
11941206
/**
11951207
* Memory configuration.
11961208
* - `true` — enable in-memory conversation history with default settings.

0 commit comments

Comments
 (0)