fix(usage,generateObject): in-memory usage tally + auto-sized output budget

jddunn · jddunn · commit e166147dbf04 · 2026-05-01T18:20:49.000-07:00
Two related issues that broke published examples on a fresh install:

1) `agent.usage()` and `session.usage()` always returned totalTokens=0 because
   the implementation only read from the persisted JSONL ledger, which is
   opt-in (`usageLedger.enabled`). Add a per-session and per-agent in-memory
   accumulator that's populated from `result.usage` after every generate / send
   / stream call, and merge it with the persisted ledger at read time so
   cross-process history continues to roll up correctly.

2) `generateObject()` truncated nested-array schemas (the published recipe
   example reliably failed) because no `maxTokens` was set, falling back to
   the provider default of ~256-512 tokens. Add a Zod-direct schema walker
   that estimates a sensible output budget from field count and array shape,
   clamped to [512, 8192]. Walks both Zod v3 (`_def.typeName`) and v4
   (`_def.type`) internals so it works regardless of which Zod the consumer
   has installed. Honors `opts.maxTokens` when explicitly set.

Verified end-to-end against the agentos.sh published examples in a
fresh-install harness with real OpenAI + Anthropic API keys.
diff --git a/src/api/agent.ts b/src/api/agent.ts
@@ -370,6 +370,60 @@ async function loadRecordedAgentOSUsage(
   return getRecordedAgentOSUsage(options);
 }
 
+/**
+ * Build a zeroed usage aggregate. Used to seed the per-agent and per-session
+ * in-memory tallies so callers see real numbers from `agent.usage()` /
+ * `session.usage()` without having to enable the persisted ledger.
+ */
+function createEmptyUsageAggregate(sessionId?: string): AgentOSUsageAggregate {
+  return {
+    sessionId,
+    personaId: undefined,
+    promptTokens: 0,
+    completionTokens: 0,
+    totalTokens: 0,
+    costUSD: 0,
+    calls: 0,
+  };
+}
+
+/**
+ * Fold a single generation's `TokenUsage` into a running `AgentOSUsageAggregate`.
+ * Mutates the target. Cost is accumulated when present on the source.
+ */
+function accumulateUsage(
+  target: AgentOSUsageAggregate,
+  usage: { promptTokens?: number; completionTokens?: number; totalTokens?: number; costUSD?: number } | undefined,
+): void {
+  if (!usage) return;
+  if (typeof usage.promptTokens === 'number') target.promptTokens += usage.promptTokens;
+  if (typeof usage.completionTokens === 'number') target.completionTokens += usage.completionTokens;
+  if (typeof usage.totalTokens === 'number') {
+    target.totalTokens += usage.totalTokens;
+  } else {
+    target.totalTokens += (usage.promptTokens ?? 0) + (usage.completionTokens ?? 0);
+  }
+  if (typeof usage.costUSD === 'number') target.costUSD = (target.costUSD ?? 0) + usage.costUSD;
+  target.calls += 1;
+}
+
+/**
+ * Merge two aggregates field-wise. Used to combine the in-memory tally with
+ * the persisted ledger total so cross-process history rolls up alongside the
+ * current process's tally.
+ */
+function mergeAggregates(a: AgentOSUsageAggregate, b: AgentOSUsageAggregate): AgentOSUsageAggregate {
+  return {
+    sessionId: a.sessionId ?? b.sessionId,
+    personaId: a.personaId ?? b.personaId,
+    promptTokens: a.promptTokens + b.promptTokens,
+    completionTokens: a.completionTokens + b.completionTokens,
+    totalTokens: a.totalTokens + b.totalTokens,
+    costUSD: (a.costUSD ?? 0) + (b.costUSD ?? 0),
+    calls: a.calls + b.calls,
+  };
+}
+
 /**
  * Convert HEXACO trait values (0-1) into behavioral descriptions the LLM can act on.
  *
@@ -472,6 +526,13 @@ function buildSystemPrompt(opts: AgentOptions): string | undefined {
  */
 export function agent(opts: AgentOptions): Agent {
   const sessions = new Map<string, Message[]>();
+  // In-memory usage tally per session and per agent. Populated synchronously
+  // after every generate/send/stream call so `agent.usage()` and
+  // `session.usage()` work even when the persisted ledger is disabled (the
+  // common case). The persisted ledger is still merged in at read time so
+  // cross-process / historical totals continue to roll up correctly.
+  const sessionUsageTallies = new Map<string, AgentOSUsageAggregate>();
+  const agentUsageTally: AgentOSUsageAggregate = createEmptyUsageAggregate();
   let avatarBindingOverrides: Record<string, unknown> = {};
   const useMemory = opts.memory !== false;
 
@@ -546,7 +607,9 @@ export function agent(opts: AgentOptions): Agent {
       } else {
         genOpts.messages = [...(genOpts.messages ?? []), { role: 'user', content: prompt }];
       }
-      return generateText(genOpts as GenerateTextOptions);
+      const result = await generateText(genOpts as GenerateTextOptions);
+      accumulateUsage(agentUsageTally, result.usage);
+      return result;
     },
 
     stream(prompt: MessageContent, extra?: Partial<GenerateTextOptions>): StreamTextResult {
@@ -567,13 +630,21 @@ export function agent(opts: AgentOptions): Agent {
       } else {
         streamOpts.messages = [...(streamOpts.messages ?? []), { role: 'user', content: prompt }];
       }
-      return streamText(streamOpts as GenerateTextOptions);
+      const result = streamText(streamOpts as GenerateTextOptions);
+      void result.usage
+        .then((usage) => accumulateUsage(agentUsageTally, usage))
+        .catch(() => { /* stream errored; usage tally unchanged */ });
+      return result;
     },
 
     session(id?: string): AgentSession {
       const sessionId = id ?? crypto.randomUUID();
       if (!sessions.has(sessionId)) sessions.set(sessionId, []);
+      if (!sessionUsageTallies.has(sessionId)) {
+        sessionUsageTallies.set(sessionId, createEmptyUsageAggregate(sessionId));
+      }
       const history = sessions.get(sessionId)!;
+      const sessionUsageTally = sessionUsageTallies.get(sessionId)!;
 
       const session = {
         id: sessionId,
@@ -640,6 +711,8 @@ export function agent(opts: AgentOptions): Agent {
           );
 
           const result = await generateText(wrappedOpts as GenerateTextOptions);
+          accumulateUsage(sessionUsageTally, result.usage);
+          accumulateUsage(agentUsageTally, result.usage);
 
           // Validate + parse when a schema was supplied. Native enforcement
           // guarantees a valid shape on every successful response, so a
@@ -701,6 +774,12 @@ export function agent(opts: AgentOptions): Agent {
           );
 
           const result = streamText(wrappedOpts as GenerateTextOptions);
+          void result.usage
+            .then((usage) => {
+              accumulateUsage(sessionUsageTally, usage);
+              accumulateUsage(agentUsageTally, usage);
+            })
+            .catch(() => { /* stream errored; usage tally unchanged */ });
 
           // Capture text for history when done. Memory observe runs inside
           // applyMemoryProvider's onAfterGeneration wrapper so it's not
@@ -723,11 +802,12 @@ export function agent(opts: AgentOptions): Agent {
         },
 
         async usage(): Promise<AgentOSUsageAggregate> {
-          return loadRecordedAgentOSUsage({
+          const persisted = await loadRecordedAgentOSUsage({
             enabled: baseOpts.usageLedger?.enabled,
             path: baseOpts.usageLedger?.path,
             sessionId,
           });
+          return mergeAggregates(sessionUsageTally, persisted);
         },
 
         clear() {
@@ -742,11 +822,17 @@ export function agent(opts: AgentOptions): Agent {
     },
 
     async usage(sessionId?: string): Promise<AgentOSUsageAggregate> {
-      return loadRecordedAgentOSUsage({
+      const persisted = await loadRecordedAgentOSUsage({
         enabled: baseOpts.usageLedger?.enabled,
         path: baseOpts.usageLedger?.path,
         sessionId,
       });
+      // When a sessionId is requested, only that session's tally is in scope.
+      // When none is requested, return the agent-wide tally.
+      const inMemory = sessionId
+        ? sessionUsageTallies.get(sessionId) ?? createEmptyUsageAggregate(sessionId)
+        : agentUsageTally;
+      return mergeAggregates(inMemory, persisted);
     },
 
     async close() {
diff --git a/src/api/generateObject.ts b/src/api/generateObject.ts
@@ -208,6 +208,94 @@ export interface GenerateObjectResult<T> {
  */
 const JSON_MODE_PROVIDERS = new Set(['openai', 'openrouter']);
 
+/**
+ * Estimate the output-token budget needed to produce a complete JSON object
+ * matching the given Zod schema. The estimate scales with field count and
+ * nested-array shape so simple schemas use a small budget while nested-array
+ * schemas (the historical truncation hot spot) get enough room to finish.
+ *
+ * Walks the Zod schema directly (handles both v3 internals via `_def.typeName`
+ * and v4 internals via `_def.type`) so it works regardless of which Zod
+ * version the consumer has installed.
+ *
+ * Returns a value clamped to [512, 8192]. Callers can override entirely by
+ * passing `opts.maxTokens` to {@link generateObject}.
+ */
+function estimateMaxTokensForZodSchema(schema: any): number {
+  const TOKENS_PER_LEAF = 30;       // average tokens per primitive field
+  const TOKENS_PER_ARRAY_ITEM = 60; // assumed per-element budget for typical strings
+  const MIN_BUDGET = 512;
+  const MAX_BUDGET = 8192;
+
+  function walk(node: any, depth: number): number {
+    if (!node || depth > 8) return TOKENS_PER_LEAF;
+    const def = (node as any)?._def;
+    if (!def) return TOKENS_PER_LEAF;
+
+    // Zod v3 uses `_def.typeName` ("ZodObject", "ZodArray", ...).
+    // Zod v4 uses `_def.type` ("object", "array", ...).
+    const typeNameV3 = def.typeName as string | undefined;
+    const typeV4 = def.type as string | undefined;
+    const kind: string = typeNameV3 ?? (typeV4 ? `Zod${typeV4[0].toUpperCase()}${typeV4.slice(1)}` : '');
+
+    switch (kind) {
+      case 'ZodOptional':
+      case 'ZodNullable':
+      case 'ZodDefault':
+      case 'ZodReadonly':
+      case 'ZodEffects':
+        return walk(def.innerType ?? def.schema, depth + 1);
+
+      case 'ZodObject': {
+        // Zod v3: shape is a function returning the shape object.
+        // Zod v4: shape is the shape object directly.
+        const shapeRaw = def.shape;
+        const shape: Record<string, any> = typeof shapeRaw === 'function' ? shapeRaw() : shapeRaw ?? {};
+        let sum = 64; // braces, commas, base structure overhead
+        for (const key of Object.keys(shape)) {
+          sum += key.length + 8;        // field name + JSON syntax
+          sum += walk(shape[key], depth + 1);
+        }
+        return sum;
+      }
+
+      case 'ZodArray': {
+        // v3 stores element on def.type (a Zod schema), v4 on def.element.
+        const inner = def.element ?? def.type;
+        const itemBudget = walk(inner, depth + 1);
+        const innerKind = inner?._def?.typeName ?? inner?._def?.type;
+        const isObjectItem = innerKind === 'ZodObject' || innerKind === 'object';
+        const assumedCount = isObjectItem ? 6 : 8;
+        return 24 + assumedCount * Math.max(itemBudget, TOKENS_PER_ARRAY_ITEM);
+      }
+
+      case 'ZodEnum':
+      case 'ZodNativeEnum': {
+        const values = def.values ?? Object.values(def.entries ?? {});
+        const arr = Array.isArray(values) ? values : Object.values(values);
+        return arr.length > 0 ? Math.max(...arr.map((v: unknown) => String(v).length)) + 4 : TOKENS_PER_LEAF;
+      }
+
+      case 'ZodLiteral':
+        return String(def.value ?? '').length + 4;
+
+      case 'ZodUnion':
+      case 'ZodDiscriminatedUnion': {
+        const opts = (def.options ?? []) as any[];
+        return opts.length > 0 ? Math.max(...opts.map((o) => walk(o, depth + 1))) : TOKENS_PER_LEAF;
+      }
+
+      default:
+        return TOKENS_PER_LEAF;
+    }
+  }
+
+  const estimate = Math.ceil(walk(schema, 0) * 1.5); // 50% headroom for prose-heavy fields
+  if (estimate < MIN_BUDGET) return MIN_BUDGET;
+  if (estimate > MAX_BUDGET) return MAX_BUDGET;
+  return estimate;
+}
+
 /**
  * Builds the schema-specific instruction text appended to every
  * generateObject call. Kept free of caller context so it can be composed
@@ -403,6 +491,13 @@ export async function generateObject<T extends ZodType>(
   let lastRawText = '';
   let lastValidationError: ZodError | undefined;
 
+  // Auto-size the output budget when the caller didn't specify one. Without
+  // this, complex nested schemas reliably truncate at the provider default
+  // (256-512 tokens) and JSON.parse fails on the unfinished output. The
+  // estimate scales with field count and array nesting depth so simple
+  // schemas don't pay for tokens they won't use.
+  const effectiveMaxTokens = opts.maxTokens ?? estimateMaxTokensForZodSchema(opts.schema);
+
   // Attempt generation up to 1 + maxRetries times (initial + retries)
   for (let attempt = 0; attempt <= maxRetries; attempt++) {
     const result = await generateText({
@@ -411,7 +506,7 @@ export async function generateObject<T extends ZodType>(
       system: systemPrompt,
       messages,
       temperature: opts.temperature,
-      maxTokens: opts.maxTokens,
+      maxTokens: effectiveMaxTokens,
       apiKey: opts.apiKey,
       baseUrl: opts.baseUrl,
       fallbackProviders: opts.fallbackProviders,