fix(structured-output): address coderabbit findings on session.send overload

jddunn · jddunn · commit b5da21032723 · 2026-04-25T23:32:34.000-07:00
Major:
- agent.ts: strip caller tools when responseSchema is set; warn once.
  Mixing tools with native structured output breaks Anthropic forced
  tool_use (schema tool reserves the slot) and OpenAI json_schema mode.
- agent.ts: resolveProviderForStructuredOutput trims and rejects empty
  parsed heads from malformed model strings (':gpt-4o', '  :foo').
- agent.ts: wrap JSON.parse + Zod validation in try/catch and re-throw
  ObjectGenerationError uniformly with raw text + cause attached.
- AnthropicProvider: stop merging caller tools with the schema tool.
  The schema tool is the only tool when _agentosUseToolForStructuredOutput
  is set; mixing them produced unpredictable model behavior.
- MigrationRunner._postgresHasColumn: scope to current_schema() to match
  _postgresTableExists. Prevents a same-named column in another search
  path from falsely returning true and skipping the actual ALTER.
- postgresPasswordRedaction: URL form parses via WHATWG URL so '@' inside
  the password no longer truncates the host. Quoted keyword form supports
  doubled-quote and backslash escapes inside the quoted password value.
- CHANGELOG: drop the 'closes [hi#relevance]' release-please placeholder
  that leaked into the rendered changelog.

Tests:
- structuredOutputFormat.test.ts: convert from node:test to vitest so it
  runs in the agentos vitest suite.
- agent.test.ts: add 7 structured-output cases covering the no-schema
  regression guard, typed object return, _responseFormat plumbing, tools
  stripping + warn, non-JSON throw, Zod-fail throw, and memory across
  schema-aware sends.

Specs/plan:
- spec §4.2 + plan Task 6.3 reflect the shipped impl: tool-stripping IIFE,
  try/catch around parse + validate, helper rename to
  resolveProviderForStructuredOutput, ObjectGenerationError(message,
  rawText, cause?) constructor signature.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -21,7 +21,7 @@ opt-in via config.typedNetwork.extractAtEncode (default false).
 * feat(AnthropicProvider): forced tool-use for schema-enforced structured output ([0ba00b9](https://github.com/framersai/agentos/commit/0ba00b9))
 * feat(GeminiProvider): responseSchema for schema-enforced structured output ([b5e1bcb](https://github.com/framersai/agentos/commit/b5e1bcb))
 * feat(memory): add subpath export for typed-network module ([db9ea8b](https://github.com/framersai/agentos/commit/db9ea8b))
-* feat(memory): Stage E Phase 4.3 - retrieve() runs typed spreading activation ([d0ab11c](https://github.com/framersai/agentos/commit/d0ab11c)), closes [hi#relevance](https://github.com/hi/issues/relevance)
+* feat(memory): Stage E Phase 4.3 - retrieve() runs typed spreading activation ([d0ab11c](https://github.com/framersai/agentos/commit/d0ab11c))
 
 ## <small>0.3.4 (2026-04-26)</small>
 
diff --git a/src/api/agent.ts b/src/api/agent.ts
@@ -345,7 +345,10 @@ export interface Agent {
 function resolveProviderForStructuredOutput(opts: Partial<GenerateTextOptions>): string {
   if (opts.provider) return opts.provider;
   if (typeof opts.model === 'string' && opts.model.includes(':')) {
-    return opts.model.split(':', 1)[0]!;
+    // Trim handles inputs like ":openai" / "  openai:gpt-4". Empty after
+    // trim falls back to the default.
+    const head = opts.model.split(':', 1)[0]?.trim();
+    if (head) return head;
   }
   return 'openai';
 }
@@ -598,9 +601,30 @@ export function agent(opts: AgentOptions): Agent {
             });
           }
 
+          // Schema-aware calls disable tools. Mixing native structured
+          // output with tool-calling requires a multi-turn schema+tool
+          // protocol that this overload doesn't speak. Anthropic's
+          // forced tool-use mode reserves the tool slot for the schema
+          // tool, and OpenAI's json_schema mode forbids tools alongside.
+          // Strip caller-provided tools when responseSchema is set;
+          // surface a console.warn so the caller can adjust if they
+          // meant to pass both. (toolChoice is not part of
+          // GenerateTextOptions; tools is the only public surface here.)
+          const baseForRequest: Partial<GenerateTextOptions> = sendOpts?.responseSchema
+            ? (() => {
+                if (baseOpts.tools !== undefined) {
+                  console.warn(
+                    '[agentos] session.send: tools are ignored when responseSchema is set. Use generateObject for one-shot schema calls or call send() without a schema for tool-loop calls.',
+                  );
+                }
+                const { tools: _tools, ...rest } = baseOpts;
+                return rest;
+              })()
+            : baseOpts;
+
           const wrappedOpts = applyMemoryProvider(
             {
-              ...baseOpts,
+              ...baseForRequest,
               messages: requestMessages,
               usageLedger: mergeUsageLedgerOptions(baseOpts.usageLedger, {
                 sessionId,
diff --git a/src/api/runtime/__tests__/agent.test.ts b/src/api/runtime/__tests__/agent.test.ts
@@ -1,4 +1,5 @@
 import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
+import { z } from 'zod';
 
 const hoisted = vi.hoisted(() => ({
   generateText: vi.fn(),
@@ -228,3 +229,154 @@ describe('agent', () => {
     expect(callArgs?.maxTokens).toBeUndefined();
   });
 });
+
+describe('agent session.send: structured output (responseSchema)', () => {
+  beforeEach(() => {
+    hoisted.generateText.mockReset();
+    hoisted.streamText.mockReset();
+    hoisted.getRecordedAgentOSUsage.mockReset();
+  });
+
+  afterEach(() => {
+    vi.restoreAllMocks();
+  });
+
+  const Decision = z.object({
+    verdict: z.enum(['yes', 'no']),
+    confidence: z.number().min(0).max(1),
+  });
+
+  it('returns plain GenerateTextResult when responseSchema is omitted (regression guard)', async () => {
+    hoisted.generateText.mockResolvedValueOnce({
+      provider: 'openai',
+      model: 'gpt-4.1-mini',
+      text: 'plain reply',
+      usage: { promptTokens: 1, completionTokens: 1, totalTokens: 2 },
+      toolCalls: [],
+      finishReason: 'stop',
+    });
+    const assistant = agent({ model: 'openai:gpt-4.1-mini' });
+    const session = assistant.session('demo');
+    const r = await session.send('hi');
+    expect(r.text).toBe('plain reply');
+    expect('object' in r).toBe(false);
+  });
+
+  it('returns typed object alongside text when responseSchema is set', async () => {
+    hoisted.generateText.mockResolvedValueOnce({
+      provider: 'openai',
+      model: 'gpt-4.1-mini',
+      text: '{"verdict":"yes","confidence":0.92}',
+      usage: { promptTokens: 1, completionTokens: 1, totalTokens: 2 },
+      toolCalls: [],
+      finishReason: 'stop',
+    });
+    const assistant = agent({ model: 'openai:gpt-4.1-mini' });
+    const session = assistant.session('demo');
+    const r = await session.send('decide', { responseSchema: Decision });
+    expect(r.object).toEqual({ verdict: 'yes', confidence: 0.92 });
+    expect(r.text).toBe('{"verdict":"yes","confidence":0.92}');
+  });
+
+  it('forwards _responseFormat to generateText when responseSchema is set (openai → json_schema)', async () => {
+    hoisted.generateText.mockResolvedValueOnce({
+      provider: 'openai',
+      model: 'gpt-4.1-mini',
+      text: '{"verdict":"yes","confidence":0.5}',
+      usage: { promptTokens: 1, completionTokens: 1, totalTokens: 2 },
+      toolCalls: [],
+      finishReason: 'stop',
+    });
+    const assistant = agent({ model: 'openai:gpt-4.1-mini' });
+    await assistant.session('demo').send('decide', {
+      responseSchema: Decision,
+      schemaName: 'Decision',
+    });
+    const callArgs = hoisted.generateText.mock.calls.at(-1)?.[0];
+    expect(callArgs?._responseFormat).toMatchObject({
+      type: 'json_schema',
+      json_schema: { name: 'Decision', strict: true },
+    });
+  });
+
+  it('strips caller-provided tools when responseSchema is set and warns once', async () => {
+    hoisted.generateText.mockResolvedValueOnce({
+      provider: 'openai',
+      model: 'gpt-4.1-mini',
+      text: '{"verdict":"no","confidence":0.1}',
+      usage: { promptTokens: 1, completionTokens: 1, totalTokens: 2 },
+      toolCalls: [],
+      finishReason: 'stop',
+    });
+    const warn = vi.spyOn(console, 'warn').mockImplementation(() => {});
+    const fakeTool = {
+      stub: {
+        description: 'stub',
+        inputSchema: { type: 'object' as const, properties: {} },
+        execute: async () => ({ success: true, output: {} }),
+      },
+    };
+    const assistant = agent({ model: 'openai:gpt-4.1-mini', tools: fakeTool as any });
+    await assistant.session('demo').send('decide', { responseSchema: Decision });
+    const callArgs = hoisted.generateText.mock.calls.at(-1)?.[0];
+    expect(callArgs?.tools).toBeUndefined();
+    expect(warn).toHaveBeenCalledWith(
+      expect.stringContaining('tools are ignored when responseSchema is set'),
+    );
+  });
+
+  it('throws ObjectGenerationError when provider returns non-JSON text', async () => {
+    hoisted.generateText.mockResolvedValueOnce({
+      provider: 'openai',
+      model: 'gpt-4.1-mini',
+      text: 'not json at all',
+      usage: { promptTokens: 1, completionTokens: 1, totalTokens: 2 },
+      toolCalls: [],
+      finishReason: 'stop',
+    });
+    const assistant = agent({ model: 'openai:gpt-4.1-mini' });
+    await expect(
+      assistant.session('demo').send('decide', { responseSchema: Decision }),
+    ).rejects.toThrow(/not valid JSON/);
+  });
+
+  it('throws ObjectGenerationError when JSON fails Zod validation', async () => {
+    hoisted.generateText.mockResolvedValueOnce({
+      provider: 'openai',
+      model: 'gpt-4.1-mini',
+      text: '{"verdict":"maybe","confidence":2}',
+      usage: { promptTokens: 1, completionTokens: 1, totalTokens: 2 },
+      toolCalls: [],
+      finishReason: 'stop',
+    });
+    const assistant = agent({ model: 'openai:gpt-4.1-mini' });
+    await expect(
+      assistant.session('demo').send('decide', { responseSchema: Decision }),
+    ).rejects.toThrow(/Zod validation/);
+  });
+
+  it('preserves session memory across schema-aware sends', async () => {
+    hoisted.generateText.mockResolvedValue({
+      provider: 'openai',
+      model: 'gpt-4.1-mini',
+      text: '{"verdict":"yes","confidence":0.8}',
+      usage: { promptTokens: 1, completionTokens: 1, totalTokens: 2 },
+      toolCalls: [],
+      finishReason: 'stop',
+    });
+    const assistant = agent({ model: 'openai:gpt-4.1-mini' });
+    const session = assistant.session('demo');
+    await session.send('first', { responseSchema: Decision });
+    await session.send('second', { responseSchema: Decision });
+    expect(hoisted.generateText).toHaveBeenNthCalledWith(
+      2,
+      expect.objectContaining({
+        messages: [
+          { role: 'user', content: 'first' },
+          { role: 'assistant', content: '{"verdict":"yes","confidence":0.8}' },
+          { role: 'user', content: 'second' },
+        ],
+      }),
+    );
+  });
+});
diff --git a/src/core/llm/providers/__tests__/structuredOutputFormat.test.ts b/src/core/llm/providers/__tests__/structuredOutputFormat.test.ts
@@ -3,8 +3,7 @@
  * @description Tests for the provider-format adapter that maps a Zod schema
  *              + provider id to the per-provider structured-output payload.
  */
-import { describe, it } from 'node:test';
-import assert from 'node:assert/strict';
+import { describe, it, expect } from 'vitest';
 import { z } from 'zod';
 import { buildResponseFormat } from '../structuredOutputFormat.js';
 
@@ -16,54 +15,54 @@ const schema = z.object({
 describe('buildResponseFormat', () => {
   it('openai returns json_schema with strict=true and a sanitized name', () => {
     const r = buildResponseFormat({ provider: 'openai', schema, schemaName: 'My.Schema' });
-    assert.equal((r as any).type, 'json_schema');
-    assert.equal((r as any).json_schema.name, 'My_Schema');
-    assert.equal((r as any).json_schema.strict, true);
-    assert.equal(typeof (r as any).json_schema.schema, 'object');
+    expect((r as any).type).toBe('json_schema');
+    expect((r as any).json_schema.name).toBe('My_Schema');
+    expect((r as any).json_schema.strict).toBe(true);
+    expect(typeof (r as any).json_schema.schema).toBe('object');
   });
 
   it('anthropic returns the _agentosUseToolForStructuredOutput marker plus tool shape', () => {
     const r = buildResponseFormat({ provider: 'anthropic', schema, schemaName: 'X' });
-    assert.equal((r as any)._agentosUseToolForStructuredOutput, true);
-    assert.equal((r as any).tool.name, 'X');
-    assert.equal(typeof (r as any).tool.input_schema, 'object');
+    expect((r as any)._agentosUseToolForStructuredOutput).toBe(true);
+    expect((r as any).tool.name).toBe('X');
+    expect(typeof (r as any).tool.input_schema).toBe('object');
   });
 
   it('gemini returns json_object with _gemini.responseSchema populated', () => {
     const r = buildResponseFormat({ provider: 'gemini', schema, schemaName: 'X' });
-    assert.equal((r as any).type, 'json_object');
-    assert.equal(typeof (r as any)._gemini.responseSchema, 'object');
+    expect((r as any).type).toBe('json_object');
+    expect(typeof (r as any)._gemini.responseSchema).toBe('object');
   });
 
   it('gemini-cli is treated like gemini', () => {
     const r = buildResponseFormat({ provider: 'gemini-cli', schema, schemaName: 'X' });
-    assert.equal((r as any).type, 'json_object');
-    assert.equal(typeof (r as any)._gemini.responseSchema, 'object');
+    expect((r as any).type).toBe('json_object');
+    expect(typeof (r as any)._gemini.responseSchema).toBe('object');
   });
 
   it('openrouter degrades to bare json_object (no enforcement available)', () => {
     const r = buildResponseFormat({ provider: 'openrouter', schema, schemaName: 'X' });
-    assert.deepEqual(r, { type: 'json_object' });
+    expect(r).toEqual({ type: 'json_object' });
   });
 
   it('unknown provider degrades to bare json_object', () => {
     const r = buildResponseFormat({ provider: 'fictional', schema, schemaName: 'X' });
-    assert.deepEqual(r, { type: 'json_object' });
+    expect(r).toEqual({ type: 'json_object' });
   });
 
   it('schemaName: replaces non-word chars with underscore', () => {
     const r = buildResponseFormat({ provider: 'openai', schema, schemaName: 'a.b/c d!' });
-    assert.equal((r as any).json_schema.name, 'a_b_c_d_');
+    expect((r as any).json_schema.name).toBe('a_b_c_d_');
   });
 
   it('schemaName: truncates to 64 chars', () => {
     const long = 'a'.repeat(80);
     const r = buildResponseFormat({ provider: 'openai', schema, schemaName: long });
-    assert.equal(((r as any).json_schema.name as string).length, 64);
+    expect(((r as any).json_schema.name as string).length).toBe(64);
   });
 
   it('schemaName: empty after sanitization falls back to "response"', () => {
     const r = buildResponseFormat({ provider: 'openai', schema, schemaName: '!!!' });
-    assert.equal((r as any).json_schema.name, 'response');
+    expect((r as any).json_schema.name).toBe('response');
   });
 });
diff --git a/src/core/llm/providers/implementations/AnthropicProvider.ts b/src/core/llm/providers/implementations/AnthropicProvider.ts
@@ -875,11 +875,15 @@ export class AnthropicProvider implements IProvider {
       | { _agentosUseToolForStructuredOutput?: boolean; tool?: { name: string; input_schema: Record<string, unknown> } }
       | undefined;
     if (sf?._agentosUseToolForStructuredOutput && sf.tool) {
-      const existingTools = (payload.tools as Array<Record<string, unknown>>) ?? [];
-      payload.tools = [
-        { name: sf.tool.name, input_schema: sf.tool.input_schema },
-        ...existingTools,
-      ];
+      // Schema-aware mode reserves the tool slot for the schema tool;
+      // mixing structured output with caller-provided tools requires a
+      // multi-turn protocol the session.send overload doesn't speak.
+      // The caller path (AgentSession.send) already strips its tools
+      // before reaching us; this drop is the second line of defense
+      // against a direct provider.generateCompletion call that
+      // accidentally passes both responseFormat (structured-output
+      // marker) and a tools array.
+      payload.tools = [{ name: sf.tool.name, input_schema: sf.tool.input_schema }];
       payload.tool_choice = { type: 'tool', name: sf.tool.name };
     }
 
diff --git a/src/memory/retrieval/store/migrations/MigrationRunner.ts b/src/memory/retrieval/store/migrations/MigrationRunner.ts
@@ -171,10 +171,17 @@ export class MigrationRunner {
     table: string,
     column: string,
   ): Promise<boolean> {
+    // Scope to current_schema() to match _postgresTableExists. Without
+    // the schema filter, a table with the same name in another schema
+    // (e.g. a previous test deployment in another search-path entry)
+    // would falsely return true and skip the actual ALTER TABLE this
+    // probe gates.
     const row = await adapter.get<{ exists: boolean }>(
       `SELECT EXISTS (
          SELECT 1 FROM information_schema.columns
-          WHERE table_name = $1 AND column_name = $2
+          WHERE table_schema = current_schema()
+            AND table_name = $1
+            AND column_name = $2
        ) AS exists`,
       [table, column],
     );
diff --git a/src/memory/retrieval/store/postgresPasswordRedaction.ts b/src/memory/retrieval/store/postgresPasswordRedaction.ts
@@ -15,11 +15,39 @@
  * - Connection strings without an embedded password pass through unchanged.
  */
 export function redactPostgresPassword(connStr: string): string {
+  let safe = connStr;
+
   // URL form: postgresql://user:password@host/db
-  let safe = connStr.replace(/(:\/\/[^:]+:)[^@]+(@)/, '$1***$2');
-  // Quoted keyword form: password='...' or password="..."
-  safe = safe.replace(/(password\s*=\s*)'[^']*'/gi, "$1'***'");
-  safe = safe.replace(/(password\s*=\s*)"[^"]*"/gi, '$1"***"');
+  // Use the URL parser so passwords containing '@' are handled correctly
+  // (the regex approach split at the wrong '@' for passwords like
+  // 'p@ss@word'). Falls back to the regex for non-URL inputs (keyword
+  // form below) since `new URL` rejects them.
+  if (/^[a-z][a-z0-9+.-]*:\/\//i.test(connStr)) {
+    try {
+      const url = new URL(connStr);
+      if (url.password) {
+        url.password = '***';
+        safe = url.toString();
+      }
+    } catch {
+      // Malformed URL — skip URL path; the keyword regexes below are no-ops
+      // for URLs and the original string passes through.
+    }
+  }
+
+  // Quoted keyword form. Inner pattern admits doubled-quote (Postgres
+  // libpq style) and backslash escapes inside quoted values so the
+  // matcher doesn't terminate early on a literal escaped quote in the
+  // password (e.g. password='a''b' or password='a\'b'), which would
+  // otherwise leak the trailing fragment past the supposed closer.
+  safe = safe.replace(
+    /(password\s*=\s*)'(?:''|\\'|[^'])*'/gi,
+    "$1'***'",
+  );
+  safe = safe.replace(
+    /(password\s*=\s*)"(?:""|\\"|[^"])*"/gi,
+    '$1"***"',
+  );
   // Bare keyword form: password=token (whitespace- or end-terminated)
   safe = safe.replace(/(password\s*=\s*)[^\s'"]+/gi, '$1***');
   return safe;