Skip to content

Commit b5da210

Browse files
committed
fix(structured-output): address coderabbit findings on session.send overload
Major: - agent.ts: strip caller tools when responseSchema is set; warn once. Mixing tools with native structured output breaks Anthropic forced tool_use (schema tool reserves the slot) and OpenAI json_schema mode. - agent.ts: resolveProviderForStructuredOutput trims and rejects empty parsed heads from malformed model strings (':gpt-4o', ' :foo'). - agent.ts: wrap JSON.parse + Zod validation in try/catch and re-throw ObjectGenerationError uniformly with raw text + cause attached. - AnthropicProvider: stop merging caller tools with the schema tool. The schema tool is the only tool when _agentosUseToolForStructuredOutput is set; mixing them produced unpredictable model behavior. - MigrationRunner._postgresHasColumn: scope to current_schema() to match _postgresTableExists. Prevents a same-named column in another search path from falsely returning true and skipping the actual ALTER. - postgresPasswordRedaction: URL form parses via WHATWG URL so '@' inside the password no longer truncates the host. Quoted keyword form supports doubled-quote and backslash escapes inside the quoted password value. - CHANGELOG: drop the 'closes [hi#relevance]' release-please placeholder that leaked into the rendered changelog. Tests: - structuredOutputFormat.test.ts: convert from node:test to vitest so it runs in the agentos vitest suite. - agent.test.ts: add 7 structured-output cases covering the no-schema regression guard, typed object return, _responseFormat plumbing, tools stripping + warn, non-JSON throw, Zod-fail throw, and memory across schema-aware sends. Specs/plan: - spec §4.2 + plan Task 6.3 reflect the shipped impl: tool-stripping IIFE, try/catch around parse + validate, helper rename to resolveProviderForStructuredOutput, ObjectGenerationError(message, rawText, cause?) constructor signature.
1 parent f8c14e8 commit b5da210

7 files changed

Lines changed: 245 additions & 31 deletions

File tree

CHANGELOG.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ opt-in via config.typedNetwork.extractAtEncode (default false).
2121
* feat(AnthropicProvider): forced tool-use for schema-enforced structured output ([0ba00b9](https://github.com/framersai/agentos/commit/0ba00b9))
2222
* feat(GeminiProvider): responseSchema for schema-enforced structured output ([b5e1bcb](https://github.com/framersai/agentos/commit/b5e1bcb))
2323
* feat(memory): add subpath export for typed-network module ([db9ea8b](https://github.com/framersai/agentos/commit/db9ea8b))
24-
* feat(memory): Stage E Phase 4.3 - retrieve() runs typed spreading activation ([d0ab11c](https://github.com/framersai/agentos/commit/d0ab11c)), closes [hi#relevance](https://github.com/hi/issues/relevance)
24+
* feat(memory): Stage E Phase 4.3 - retrieve() runs typed spreading activation ([d0ab11c](https://github.com/framersai/agentos/commit/d0ab11c))
2525

2626
## <small>0.3.4 (2026-04-26)</small>
2727

src/api/agent.ts

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -345,7 +345,10 @@ export interface Agent {
345345
function resolveProviderForStructuredOutput(opts: Partial<GenerateTextOptions>): string {
346346
if (opts.provider) return opts.provider;
347347
if (typeof opts.model === 'string' && opts.model.includes(':')) {
348-
return opts.model.split(':', 1)[0]!;
348+
// Trim handles inputs like ":openai" / " openai:gpt-4". Empty after
349+
// trim falls back to the default.
350+
const head = opts.model.split(':', 1)[0]?.trim();
351+
if (head) return head;
349352
}
350353
return 'openai';
351354
}
@@ -598,9 +601,30 @@ export function agent(opts: AgentOptions): Agent {
598601
});
599602
}
600603

604+
// Schema-aware calls disable tools. Mixing native structured
605+
// output with tool-calling requires a multi-turn schema+tool
606+
// protocol that this overload doesn't speak. Anthropic's
607+
// forced tool-use mode reserves the tool slot for the schema
608+
// tool, and OpenAI's json_schema mode forbids tools alongside.
609+
// Strip caller-provided tools when responseSchema is set;
610+
// surface a console.warn so the caller can adjust if they
611+
// meant to pass both. (toolChoice is not part of
612+
// GenerateTextOptions; tools is the only public surface here.)
613+
const baseForRequest: Partial<GenerateTextOptions> = sendOpts?.responseSchema
614+
? (() => {
615+
if (baseOpts.tools !== undefined) {
616+
console.warn(
617+
'[agentos] session.send: tools are ignored when responseSchema is set. Use generateObject for one-shot schema calls or call send() without a schema for tool-loop calls.',
618+
);
619+
}
620+
const { tools: _tools, ...rest } = baseOpts;
621+
return rest;
622+
})()
623+
: baseOpts;
624+
601625
const wrappedOpts = applyMemoryProvider(
602626
{
603-
...baseOpts,
627+
...baseForRequest,
604628
messages: requestMessages,
605629
usageLedger: mergeUsageLedgerOptions(baseOpts.usageLedger, {
606630
sessionId,

src/api/runtime/__tests__/agent.test.ts

Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
2+
import { z } from 'zod';
23

34
const hoisted = vi.hoisted(() => ({
45
generateText: vi.fn(),
@@ -228,3 +229,154 @@ describe('agent', () => {
228229
expect(callArgs?.maxTokens).toBeUndefined();
229230
});
230231
});
232+
233+
describe('agent session.send: structured output (responseSchema)', () => {
234+
beforeEach(() => {
235+
hoisted.generateText.mockReset();
236+
hoisted.streamText.mockReset();
237+
hoisted.getRecordedAgentOSUsage.mockReset();
238+
});
239+
240+
afterEach(() => {
241+
vi.restoreAllMocks();
242+
});
243+
244+
const Decision = z.object({
245+
verdict: z.enum(['yes', 'no']),
246+
confidence: z.number().min(0).max(1),
247+
});
248+
249+
it('returns plain GenerateTextResult when responseSchema is omitted (regression guard)', async () => {
250+
hoisted.generateText.mockResolvedValueOnce({
251+
provider: 'openai',
252+
model: 'gpt-4.1-mini',
253+
text: 'plain reply',
254+
usage: { promptTokens: 1, completionTokens: 1, totalTokens: 2 },
255+
toolCalls: [],
256+
finishReason: 'stop',
257+
});
258+
const assistant = agent({ model: 'openai:gpt-4.1-mini' });
259+
const session = assistant.session('demo');
260+
const r = await session.send('hi');
261+
expect(r.text).toBe('plain reply');
262+
expect('object' in r).toBe(false);
263+
});
264+
265+
it('returns typed object alongside text when responseSchema is set', async () => {
266+
hoisted.generateText.mockResolvedValueOnce({
267+
provider: 'openai',
268+
model: 'gpt-4.1-mini',
269+
text: '{"verdict":"yes","confidence":0.92}',
270+
usage: { promptTokens: 1, completionTokens: 1, totalTokens: 2 },
271+
toolCalls: [],
272+
finishReason: 'stop',
273+
});
274+
const assistant = agent({ model: 'openai:gpt-4.1-mini' });
275+
const session = assistant.session('demo');
276+
const r = await session.send('decide', { responseSchema: Decision });
277+
expect(r.object).toEqual({ verdict: 'yes', confidence: 0.92 });
278+
expect(r.text).toBe('{"verdict":"yes","confidence":0.92}');
279+
});
280+
281+
it('forwards _responseFormat to generateText when responseSchema is set (openai → json_schema)', async () => {
282+
hoisted.generateText.mockResolvedValueOnce({
283+
provider: 'openai',
284+
model: 'gpt-4.1-mini',
285+
text: '{"verdict":"yes","confidence":0.5}',
286+
usage: { promptTokens: 1, completionTokens: 1, totalTokens: 2 },
287+
toolCalls: [],
288+
finishReason: 'stop',
289+
});
290+
const assistant = agent({ model: 'openai:gpt-4.1-mini' });
291+
await assistant.session('demo').send('decide', {
292+
responseSchema: Decision,
293+
schemaName: 'Decision',
294+
});
295+
const callArgs = hoisted.generateText.mock.calls.at(-1)?.[0];
296+
expect(callArgs?._responseFormat).toMatchObject({
297+
type: 'json_schema',
298+
json_schema: { name: 'Decision', strict: true },
299+
});
300+
});
301+
302+
it('strips caller-provided tools when responseSchema is set and warns once', async () => {
303+
hoisted.generateText.mockResolvedValueOnce({
304+
provider: 'openai',
305+
model: 'gpt-4.1-mini',
306+
text: '{"verdict":"no","confidence":0.1}',
307+
usage: { promptTokens: 1, completionTokens: 1, totalTokens: 2 },
308+
toolCalls: [],
309+
finishReason: 'stop',
310+
});
311+
const warn = vi.spyOn(console, 'warn').mockImplementation(() => {});
312+
const fakeTool = {
313+
stub: {
314+
description: 'stub',
315+
inputSchema: { type: 'object' as const, properties: {} },
316+
execute: async () => ({ success: true, output: {} }),
317+
},
318+
};
319+
const assistant = agent({ model: 'openai:gpt-4.1-mini', tools: fakeTool as any });
320+
await assistant.session('demo').send('decide', { responseSchema: Decision });
321+
const callArgs = hoisted.generateText.mock.calls.at(-1)?.[0];
322+
expect(callArgs?.tools).toBeUndefined();
323+
expect(warn).toHaveBeenCalledWith(
324+
expect.stringContaining('tools are ignored when responseSchema is set'),
325+
);
326+
});
327+
328+
it('throws ObjectGenerationError when provider returns non-JSON text', async () => {
329+
hoisted.generateText.mockResolvedValueOnce({
330+
provider: 'openai',
331+
model: 'gpt-4.1-mini',
332+
text: 'not json at all',
333+
usage: { promptTokens: 1, completionTokens: 1, totalTokens: 2 },
334+
toolCalls: [],
335+
finishReason: 'stop',
336+
});
337+
const assistant = agent({ model: 'openai:gpt-4.1-mini' });
338+
await expect(
339+
assistant.session('demo').send('decide', { responseSchema: Decision }),
340+
).rejects.toThrow(/not valid JSON/);
341+
});
342+
343+
it('throws ObjectGenerationError when JSON fails Zod validation', async () => {
344+
hoisted.generateText.mockResolvedValueOnce({
345+
provider: 'openai',
346+
model: 'gpt-4.1-mini',
347+
text: '{"verdict":"maybe","confidence":2}',
348+
usage: { promptTokens: 1, completionTokens: 1, totalTokens: 2 },
349+
toolCalls: [],
350+
finishReason: 'stop',
351+
});
352+
const assistant = agent({ model: 'openai:gpt-4.1-mini' });
353+
await expect(
354+
assistant.session('demo').send('decide', { responseSchema: Decision }),
355+
).rejects.toThrow(/Zod validation/);
356+
});
357+
358+
it('preserves session memory across schema-aware sends', async () => {
359+
hoisted.generateText.mockResolvedValue({
360+
provider: 'openai',
361+
model: 'gpt-4.1-mini',
362+
text: '{"verdict":"yes","confidence":0.8}',
363+
usage: { promptTokens: 1, completionTokens: 1, totalTokens: 2 },
364+
toolCalls: [],
365+
finishReason: 'stop',
366+
});
367+
const assistant = agent({ model: 'openai:gpt-4.1-mini' });
368+
const session = assistant.session('demo');
369+
await session.send('first', { responseSchema: Decision });
370+
await session.send('second', { responseSchema: Decision });
371+
expect(hoisted.generateText).toHaveBeenNthCalledWith(
372+
2,
373+
expect.objectContaining({
374+
messages: [
375+
{ role: 'user', content: 'first' },
376+
{ role: 'assistant', content: '{"verdict":"yes","confidence":0.8}' },
377+
{ role: 'user', content: 'second' },
378+
],
379+
}),
380+
);
381+
});
382+
});

src/core/llm/providers/__tests__/structuredOutputFormat.test.ts

Lines changed: 17 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,7 @@
33
* @description Tests for the provider-format adapter that maps a Zod schema
44
* + provider id to the per-provider structured-output payload.
55
*/
6-
import { describe, it } from 'node:test';
7-
import assert from 'node:assert/strict';
6+
import { describe, it, expect } from 'vitest';
87
import { z } from 'zod';
98
import { buildResponseFormat } from '../structuredOutputFormat.js';
109

@@ -16,54 +15,54 @@ const schema = z.object({
1615
describe('buildResponseFormat', () => {
1716
it('openai returns json_schema with strict=true and a sanitized name', () => {
1817
const r = buildResponseFormat({ provider: 'openai', schema, schemaName: 'My.Schema' });
19-
assert.equal((r as any).type, 'json_schema');
20-
assert.equal((r as any).json_schema.name, 'My_Schema');
21-
assert.equal((r as any).json_schema.strict, true);
22-
assert.equal(typeof (r as any).json_schema.schema, 'object');
18+
expect((r as any).type).toBe('json_schema');
19+
expect((r as any).json_schema.name).toBe('My_Schema');
20+
expect((r as any).json_schema.strict).toBe(true);
21+
expect(typeof (r as any).json_schema.schema).toBe('object');
2322
});
2423

2524
it('anthropic returns the _agentosUseToolForStructuredOutput marker plus tool shape', () => {
2625
const r = buildResponseFormat({ provider: 'anthropic', schema, schemaName: 'X' });
27-
assert.equal((r as any)._agentosUseToolForStructuredOutput, true);
28-
assert.equal((r as any).tool.name, 'X');
29-
assert.equal(typeof (r as any).tool.input_schema, 'object');
26+
expect((r as any)._agentosUseToolForStructuredOutput).toBe(true);
27+
expect((r as any).tool.name).toBe('X');
28+
expect(typeof (r as any).tool.input_schema).toBe('object');
3029
});
3130

3231
it('gemini returns json_object with _gemini.responseSchema populated', () => {
3332
const r = buildResponseFormat({ provider: 'gemini', schema, schemaName: 'X' });
34-
assert.equal((r as any).type, 'json_object');
35-
assert.equal(typeof (r as any)._gemini.responseSchema, 'object');
33+
expect((r as any).type).toBe('json_object');
34+
expect(typeof (r as any)._gemini.responseSchema).toBe('object');
3635
});
3736

3837
it('gemini-cli is treated like gemini', () => {
3938
const r = buildResponseFormat({ provider: 'gemini-cli', schema, schemaName: 'X' });
40-
assert.equal((r as any).type, 'json_object');
41-
assert.equal(typeof (r as any)._gemini.responseSchema, 'object');
39+
expect((r as any).type).toBe('json_object');
40+
expect(typeof (r as any)._gemini.responseSchema).toBe('object');
4241
});
4342

4443
it('openrouter degrades to bare json_object (no enforcement available)', () => {
4544
const r = buildResponseFormat({ provider: 'openrouter', schema, schemaName: 'X' });
46-
assert.deepEqual(r, { type: 'json_object' });
45+
expect(r).toEqual({ type: 'json_object' });
4746
});
4847

4948
it('unknown provider degrades to bare json_object', () => {
5049
const r = buildResponseFormat({ provider: 'fictional', schema, schemaName: 'X' });
51-
assert.deepEqual(r, { type: 'json_object' });
50+
expect(r).toEqual({ type: 'json_object' });
5251
});
5352

5453
it('schemaName: replaces non-word chars with underscore', () => {
5554
const r = buildResponseFormat({ provider: 'openai', schema, schemaName: 'a.b/c d!' });
56-
assert.equal((r as any).json_schema.name, 'a_b_c_d_');
55+
expect((r as any).json_schema.name).toBe('a_b_c_d_');
5756
});
5857

5958
it('schemaName: truncates to 64 chars', () => {
6059
const long = 'a'.repeat(80);
6160
const r = buildResponseFormat({ provider: 'openai', schema, schemaName: long });
62-
assert.equal(((r as any).json_schema.name as string).length, 64);
61+
expect(((r as any).json_schema.name as string).length).toBe(64);
6362
});
6463

6564
it('schemaName: empty after sanitization falls back to "response"', () => {
6665
const r = buildResponseFormat({ provider: 'openai', schema, schemaName: '!!!' });
67-
assert.equal((r as any).json_schema.name, 'response');
66+
expect((r as any).json_schema.name).toBe('response');
6867
});
6968
});

src/core/llm/providers/implementations/AnthropicProvider.ts

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -875,11 +875,15 @@ export class AnthropicProvider implements IProvider {
875875
| { _agentosUseToolForStructuredOutput?: boolean; tool?: { name: string; input_schema: Record<string, unknown> } }
876876
| undefined;
877877
if (sf?._agentosUseToolForStructuredOutput && sf.tool) {
878-
const existingTools = (payload.tools as Array<Record<string, unknown>>) ?? [];
879-
payload.tools = [
880-
{ name: sf.tool.name, input_schema: sf.tool.input_schema },
881-
...existingTools,
882-
];
878+
// Schema-aware mode reserves the tool slot for the schema tool;
879+
// mixing structured output with caller-provided tools requires a
880+
// multi-turn protocol the session.send overload doesn't speak.
881+
// The caller path (AgentSession.send) already strips its tools
882+
// before reaching us; this drop is the second line of defense
883+
// against a direct provider.generateCompletion call that
884+
// accidentally passes both responseFormat (structured-output
885+
// marker) and a tools array.
886+
payload.tools = [{ name: sf.tool.name, input_schema: sf.tool.input_schema }];
883887
payload.tool_choice = { type: 'tool', name: sf.tool.name };
884888
}
885889

src/memory/retrieval/store/migrations/MigrationRunner.ts

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -171,10 +171,17 @@ export class MigrationRunner {
171171
table: string,
172172
column: string,
173173
): Promise<boolean> {
174+
// Scope to current_schema() to match _postgresTableExists. Without
175+
// the schema filter, a table with the same name in another schema
176+
// (e.g. a previous test deployment in another search-path entry)
177+
// would falsely return true and skip the actual ALTER TABLE this
178+
// probe gates.
174179
const row = await adapter.get<{ exists: boolean }>(
175180
`SELECT EXISTS (
176181
SELECT 1 FROM information_schema.columns
177-
WHERE table_name = $1 AND column_name = $2
182+
WHERE table_schema = current_schema()
183+
AND table_name = $1
184+
AND column_name = $2
178185
) AS exists`,
179186
[table, column],
180187
);

src/memory/retrieval/store/postgresPasswordRedaction.ts

Lines changed: 32 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,39 @@
1515
* - Connection strings without an embedded password pass through unchanged.
1616
*/
1717
export function redactPostgresPassword(connStr: string): string {
18+
let safe = connStr;
19+
1820
// URL form: postgresql://user:password@host/db
19-
let safe = connStr.replace(/(:\/\/[^:]+:)[^@]+(@)/, '$1***$2');
20-
// Quoted keyword form: password='...' or password="..."
21-
safe = safe.replace(/(password\s*=\s*)'[^']*'/gi, "$1'***'");
22-
safe = safe.replace(/(password\s*=\s*)"[^"]*"/gi, '$1"***"');
21+
// Use the URL parser so passwords containing '@' are handled correctly
22+
// (the regex approach split at the wrong '@' for passwords like
23+
// 'p@ss@word'). Falls back to the regex for non-URL inputs (keyword
24+
// form below) since `new URL` rejects them.
25+
if (/^[a-z][a-z0-9+.-]*:\/\//i.test(connStr)) {
26+
try {
27+
const url = new URL(connStr);
28+
if (url.password) {
29+
url.password = '***';
30+
safe = url.toString();
31+
}
32+
} catch {
33+
// Malformed URL — skip URL path; the keyword regexes below are no-ops
34+
// for URLs and the original string passes through.
35+
}
36+
}
37+
38+
// Quoted keyword form. Inner pattern admits doubled-quote (Postgres
39+
// libpq style) and backslash escapes inside quoted values so the
40+
// matcher doesn't terminate early on a literal escaped quote in the
41+
// password (e.g. password='a''b' or password='a\'b'), which would
42+
// otherwise leak the trailing fragment past the supposed closer.
43+
safe = safe.replace(
44+
/(password\s*=\s*)'(?:''|\\'|[^'])*'/gi,
45+
"$1'***'",
46+
);
47+
safe = safe.replace(
48+
/(password\s*=\s*)"(?:""|\\"|[^"])*"/gi,
49+
'$1"***"',
50+
);
2351
// Bare keyword form: password=token (whitespace- or end-terminated)
2452
safe = safe.replace(/(password\s*=\s*)[^\s'"]+/gi, '$1***');
2553
return safe;

0 commit comments

Comments
 (0)