Skip to content

Commit e166147

Browse files
committed
fix(usage,generateObject): in-memory usage tally + auto-sized output budget
Two related issues that broke published examples on a fresh install: 1) `agent.usage()` and `session.usage()` always returned totalTokens=0 because the implementation only read from the persisted JSONL ledger, which is opt-in (`usageLedger.enabled`). Add a per-session and per-agent in-memory accumulator that's populated from `result.usage` after every generate / send / stream call, and merge it with the persisted ledger at read time so cross-process history continues to roll up correctly. 2) `generateObject()` truncated nested-array schemas (the published recipe example reliably failed) because no `maxTokens` was set, falling back to the provider default of ~256-512 tokens. Add a Zod-direct schema walker that estimates a sensible output budget from field count and array shape, clamped to [512, 8192]. Walks both Zod v3 (`_def.typeName`) and v4 (`_def.type`) internals so it works regardless of which Zod the consumer has installed. Honors `opts.maxTokens` when explicitly set. Verified end-to-end against the agentos.sh published examples in a fresh-install harness with real OpenAI + Anthropic API keys.
1 parent 50871ff commit e166147

2 files changed

Lines changed: 186 additions & 5 deletions

File tree

src/api/agent.ts

Lines changed: 90 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -370,6 +370,60 @@ async function loadRecordedAgentOSUsage(
370370
return getRecordedAgentOSUsage(options);
371371
}
372372

373+
/**
374+
* Build a zeroed usage aggregate. Used to seed the per-agent and per-session
375+
* in-memory tallies so callers see real numbers from `agent.usage()` /
376+
* `session.usage()` without having to enable the persisted ledger.
377+
*/
378+
function createEmptyUsageAggregate(sessionId?: string): AgentOSUsageAggregate {
379+
return {
380+
sessionId,
381+
personaId: undefined,
382+
promptTokens: 0,
383+
completionTokens: 0,
384+
totalTokens: 0,
385+
costUSD: 0,
386+
calls: 0,
387+
};
388+
}
389+
390+
/**
391+
* Fold a single generation's `TokenUsage` into a running `AgentOSUsageAggregate`.
392+
* Mutates the target. Cost is accumulated when present on the source.
393+
*/
394+
function accumulateUsage(
395+
target: AgentOSUsageAggregate,
396+
usage: { promptTokens?: number; completionTokens?: number; totalTokens?: number; costUSD?: number } | undefined,
397+
): void {
398+
if (!usage) return;
399+
if (typeof usage.promptTokens === 'number') target.promptTokens += usage.promptTokens;
400+
if (typeof usage.completionTokens === 'number') target.completionTokens += usage.completionTokens;
401+
if (typeof usage.totalTokens === 'number') {
402+
target.totalTokens += usage.totalTokens;
403+
} else {
404+
target.totalTokens += (usage.promptTokens ?? 0) + (usage.completionTokens ?? 0);
405+
}
406+
if (typeof usage.costUSD === 'number') target.costUSD = (target.costUSD ?? 0) + usage.costUSD;
407+
target.calls += 1;
408+
}
409+
410+
/**
411+
* Merge two aggregates field-wise. Used to combine the in-memory tally with
412+
* the persisted ledger total so cross-process history rolls up alongside the
413+
* current process's tally.
414+
*/
415+
function mergeAggregates(a: AgentOSUsageAggregate, b: AgentOSUsageAggregate): AgentOSUsageAggregate {
416+
return {
417+
sessionId: a.sessionId ?? b.sessionId,
418+
personaId: a.personaId ?? b.personaId,
419+
promptTokens: a.promptTokens + b.promptTokens,
420+
completionTokens: a.completionTokens + b.completionTokens,
421+
totalTokens: a.totalTokens + b.totalTokens,
422+
costUSD: (a.costUSD ?? 0) + (b.costUSD ?? 0),
423+
calls: a.calls + b.calls,
424+
};
425+
}
426+
373427
/**
374428
* Convert HEXACO trait values (0-1) into behavioral descriptions the LLM can act on.
375429
*
@@ -472,6 +526,13 @@ function buildSystemPrompt(opts: AgentOptions): string | undefined {
472526
*/
473527
export function agent(opts: AgentOptions): Agent {
474528
const sessions = new Map<string, Message[]>();
529+
// In-memory usage tally per session and per agent. Populated synchronously
530+
// after every generate/send/stream call so `agent.usage()` and
531+
// `session.usage()` work even when the persisted ledger is disabled (the
532+
// common case). The persisted ledger is still merged in at read time so
533+
// cross-process / historical totals continue to roll up correctly.
534+
const sessionUsageTallies = new Map<string, AgentOSUsageAggregate>();
535+
const agentUsageTally: AgentOSUsageAggregate = createEmptyUsageAggregate();
475536
let avatarBindingOverrides: Record<string, unknown> = {};
476537
const useMemory = opts.memory !== false;
477538

@@ -546,7 +607,9 @@ export function agent(opts: AgentOptions): Agent {
546607
} else {
547608
genOpts.messages = [...(genOpts.messages ?? []), { role: 'user', content: prompt }];
548609
}
549-
return generateText(genOpts as GenerateTextOptions);
610+
const result = await generateText(genOpts as GenerateTextOptions);
611+
accumulateUsage(agentUsageTally, result.usage);
612+
return result;
550613
},
551614

552615
stream(prompt: MessageContent, extra?: Partial<GenerateTextOptions>): StreamTextResult {
@@ -567,13 +630,21 @@ export function agent(opts: AgentOptions): Agent {
567630
} else {
568631
streamOpts.messages = [...(streamOpts.messages ?? []), { role: 'user', content: prompt }];
569632
}
570-
return streamText(streamOpts as GenerateTextOptions);
633+
const result = streamText(streamOpts as GenerateTextOptions);
634+
void result.usage
635+
.then((usage) => accumulateUsage(agentUsageTally, usage))
636+
.catch(() => { /* stream errored; usage tally unchanged */ });
637+
return result;
571638
},
572639

573640
session(id?: string): AgentSession {
574641
const sessionId = id ?? crypto.randomUUID();
575642
if (!sessions.has(sessionId)) sessions.set(sessionId, []);
643+
if (!sessionUsageTallies.has(sessionId)) {
644+
sessionUsageTallies.set(sessionId, createEmptyUsageAggregate(sessionId));
645+
}
576646
const history = sessions.get(sessionId)!;
647+
const sessionUsageTally = sessionUsageTallies.get(sessionId)!;
577648

578649
const session = {
579650
id: sessionId,
@@ -640,6 +711,8 @@ export function agent(opts: AgentOptions): Agent {
640711
);
641712

642713
const result = await generateText(wrappedOpts as GenerateTextOptions);
714+
accumulateUsage(sessionUsageTally, result.usage);
715+
accumulateUsage(agentUsageTally, result.usage);
643716

644717
// Validate + parse when a schema was supplied. Native enforcement
645718
// guarantees a valid shape on every successful response, so a
@@ -701,6 +774,12 @@ export function agent(opts: AgentOptions): Agent {
701774
);
702775

703776
const result = streamText(wrappedOpts as GenerateTextOptions);
777+
void result.usage
778+
.then((usage) => {
779+
accumulateUsage(sessionUsageTally, usage);
780+
accumulateUsage(agentUsageTally, usage);
781+
})
782+
.catch(() => { /* stream errored; usage tally unchanged */ });
704783

705784
// Capture text for history when done. Memory observe runs inside
706785
// applyMemoryProvider's onAfterGeneration wrapper so it's not
@@ -723,11 +802,12 @@ export function agent(opts: AgentOptions): Agent {
723802
},
724803

725804
async usage(): Promise<AgentOSUsageAggregate> {
726-
return loadRecordedAgentOSUsage({
805+
const persisted = await loadRecordedAgentOSUsage({
727806
enabled: baseOpts.usageLedger?.enabled,
728807
path: baseOpts.usageLedger?.path,
729808
sessionId,
730809
});
810+
return mergeAggregates(sessionUsageTally, persisted);
731811
},
732812

733813
clear() {
@@ -742,11 +822,17 @@ export function agent(opts: AgentOptions): Agent {
742822
},
743823

744824
async usage(sessionId?: string): Promise<AgentOSUsageAggregate> {
745-
return loadRecordedAgentOSUsage({
825+
const persisted = await loadRecordedAgentOSUsage({
746826
enabled: baseOpts.usageLedger?.enabled,
747827
path: baseOpts.usageLedger?.path,
748828
sessionId,
749829
});
830+
// When a sessionId is requested, only that session's tally is in scope.
831+
// When none is requested, return the agent-wide tally.
832+
const inMemory = sessionId
833+
? sessionUsageTallies.get(sessionId) ?? createEmptyUsageAggregate(sessionId)
834+
: agentUsageTally;
835+
return mergeAggregates(inMemory, persisted);
750836
},
751837

752838
async close() {

src/api/generateObject.ts

Lines changed: 96 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,94 @@ export interface GenerateObjectResult<T> {
208208
*/
209209
const JSON_MODE_PROVIDERS = new Set(['openai', 'openrouter']);
210210

211+
/**
212+
* Estimate the output-token budget needed to produce a complete JSON object
213+
* matching the given Zod schema. The estimate scales with field count and
214+
* nested-array shape so simple schemas use a small budget while nested-array
215+
* schemas (the historical truncation hot spot) get enough room to finish.
216+
*
217+
* Walks the Zod schema directly (handles both v3 internals via `_def.typeName`
218+
* and v4 internals via `_def.type`) so it works regardless of which Zod
219+
* version the consumer has installed.
220+
*
221+
* Returns a value clamped to [512, 8192]. Callers can override entirely by
222+
* passing `opts.maxTokens` to {@link generateObject}.
223+
*/
224+
function estimateMaxTokensForZodSchema(schema: any): number {
225+
const TOKENS_PER_LEAF = 30; // average tokens per primitive field
226+
const TOKENS_PER_ARRAY_ITEM = 60; // assumed per-element budget for typical strings
227+
const MIN_BUDGET = 512;
228+
const MAX_BUDGET = 8192;
229+
230+
function walk(node: any, depth: number): number {
231+
if (!node || depth > 8) return TOKENS_PER_LEAF;
232+
const def = (node as any)?._def;
233+
if (!def) return TOKENS_PER_LEAF;
234+
235+
// Zod v3 uses `_def.typeName` ("ZodObject", "ZodArray", ...).
236+
// Zod v4 uses `_def.type` ("object", "array", ...).
237+
const typeNameV3 = def.typeName as string | undefined;
238+
const typeV4 = def.type as string | undefined;
239+
const kind: string = typeNameV3 ?? (typeV4 ? `Zod${typeV4[0].toUpperCase()}${typeV4.slice(1)}` : '');
240+
241+
switch (kind) {
242+
case 'ZodOptional':
243+
case 'ZodNullable':
244+
case 'ZodDefault':
245+
case 'ZodReadonly':
246+
case 'ZodEffects':
247+
return walk(def.innerType ?? def.schema, depth + 1);
248+
249+
case 'ZodObject': {
250+
// Zod v3: shape is a function returning the shape object.
251+
// Zod v4: shape is the shape object directly.
252+
const shapeRaw = def.shape;
253+
const shape: Record<string, any> = typeof shapeRaw === 'function' ? shapeRaw() : shapeRaw ?? {};
254+
let sum = 64; // braces, commas, base structure overhead
255+
for (const key of Object.keys(shape)) {
256+
sum += key.length + 8; // field name + JSON syntax
257+
sum += walk(shape[key], depth + 1);
258+
}
259+
return sum;
260+
}
261+
262+
case 'ZodArray': {
263+
// v3 stores element on def.type (a Zod schema), v4 on def.element.
264+
const inner = def.element ?? def.type;
265+
const itemBudget = walk(inner, depth + 1);
266+
const innerKind = inner?._def?.typeName ?? inner?._def?.type;
267+
const isObjectItem = innerKind === 'ZodObject' || innerKind === 'object';
268+
const assumedCount = isObjectItem ? 6 : 8;
269+
return 24 + assumedCount * Math.max(itemBudget, TOKENS_PER_ARRAY_ITEM);
270+
}
271+
272+
case 'ZodEnum':
273+
case 'ZodNativeEnum': {
274+
const values = def.values ?? Object.values(def.entries ?? {});
275+
const arr = Array.isArray(values) ? values : Object.values(values);
276+
return arr.length > 0 ? Math.max(...arr.map((v: unknown) => String(v).length)) + 4 : TOKENS_PER_LEAF;
277+
}
278+
279+
case 'ZodLiteral':
280+
return String(def.value ?? '').length + 4;
281+
282+
case 'ZodUnion':
283+
case 'ZodDiscriminatedUnion': {
284+
const opts = (def.options ?? []) as any[];
285+
return opts.length > 0 ? Math.max(...opts.map((o) => walk(o, depth + 1))) : TOKENS_PER_LEAF;
286+
}
287+
288+
default:
289+
return TOKENS_PER_LEAF;
290+
}
291+
}
292+
293+
const estimate = Math.ceil(walk(schema, 0) * 1.5); // 50% headroom for prose-heavy fields
294+
if (estimate < MIN_BUDGET) return MIN_BUDGET;
295+
if (estimate > MAX_BUDGET) return MAX_BUDGET;
296+
return estimate;
297+
}
298+
211299
/**
212300
* Builds the schema-specific instruction text appended to every
213301
* generateObject call. Kept free of caller context so it can be composed
@@ -403,6 +491,13 @@ export async function generateObject<T extends ZodType>(
403491
let lastRawText = '';
404492
let lastValidationError: ZodError | undefined;
405493

494+
// Auto-size the output budget when the caller didn't specify one. Without
495+
// this, complex nested schemas reliably truncate at the provider default
496+
// (256-512 tokens) and JSON.parse fails on the unfinished output. The
497+
// estimate scales with field count and array nesting depth so simple
498+
// schemas don't pay for tokens they won't use.
499+
const effectiveMaxTokens = opts.maxTokens ?? estimateMaxTokensForZodSchema(opts.schema);
500+
406501
// Attempt generation up to 1 + maxRetries times (initial + retries)
407502
for (let attempt = 0; attempt <= maxRetries; attempt++) {
408503
const result = await generateText({
@@ -411,7 +506,7 @@ export async function generateObject<T extends ZodType>(
411506
system: systemPrompt,
412507
messages,
413508
temperature: opts.temperature,
414-
maxTokens: opts.maxTokens,
509+
maxTokens: effectiveMaxTokens,
415510
apiKey: opts.apiKey,
416511
baseUrl: opts.baseUrl,
417512
fallbackProviders: opts.fallbackProviders,

0 commit comments

Comments
 (0)