Skip to content

Commit a97e059

Browse files
committed
feat(api): wire PlanningEngine into generateText tool loop
Add plan-then-execute mode to generateText(). When `planning: true` (or a PlanningConfig object) is passed, an upfront LLM call decomposes the user's request into numbered steps. The plan is injected as a system message before the tool loop so the model executes with full awareness of the strategy. - Add PlanningConfig, PlanStep, Plan types to GenerateTextOptions - Add createPlan() helper that makes a single planning LLM call - Add formatPlanForPrompt() to inject plan into system messages - Propagate plan into GenerateTextResult on all return paths - Add chainOfThought option to GenerateTextOptions - Add planning-integration.test.ts with 3 tests: disabled (no plan call), enabled (plan + tool loop), custom config
1 parent d9c4767 commit a97e059

2 files changed

Lines changed: 306 additions & 1 deletion

File tree

Lines changed: 251 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,251 @@
1+
/**
2+
* @file planning-integration.test.ts
3+
* @description Tests for the plan-then-execute integration in generateText.
4+
*
5+
* Validates three behaviours:
6+
* 1. Planning disabled (default) — no planning call is made.
7+
* 2. Planning enabled (`planning: true`) — an upfront planning call is made,
8+
* the plan is injected into the system prompt, and the tool loop proceeds.
9+
* 3. Planning with custom config — custom temperature, maxTokens, and system
10+
* prompt are forwarded to the planning call.
11+
*/
12+
13+
import { describe, expect, it, vi, beforeEach } from 'vitest';
14+
15+
// ---------------------------------------------------------------------------
16+
// Mock setup — mirrors the pattern used by generateText.test.ts
17+
// ---------------------------------------------------------------------------
18+
19+
const hoisted = vi.hoisted(() => {
20+
const generateCompletion = vi.fn();
21+
const getProvider = vi.fn(() => ({ generateCompletion }));
22+
const createProviderManager = vi.fn(async () => ({ getProvider }));
23+
return {
24+
generateCompletion,
25+
getProvider,
26+
createProviderManager,
27+
};
28+
});
29+
30+
vi.mock('../model.js', () => ({
31+
resolveModelOption: vi.fn(() => ({ providerId: 'openai', modelId: 'gpt-4.1-mini' })),
32+
resolveProvider: vi.fn(() => ({
33+
providerId: 'openai',
34+
modelId: 'gpt-4.1-mini',
35+
apiKey: 'test-key',
36+
})),
37+
createProviderManager: hoisted.createProviderManager,
38+
}));
39+
40+
import { generateText } from '../generateText.js';
41+
42+
// ---------------------------------------------------------------------------
43+
// Helpers
44+
// ---------------------------------------------------------------------------
45+
46+
/**
47+
* Returns a mock LLM response that mimics a successful plan generation.
48+
* The model returns a JSON plan with the given steps.
49+
*/
50+
function makePlanResponse(steps: Array<{ description: string; tool: string | null; reasoning: string }>) {
51+
return {
52+
modelId: 'gpt-4.1-mini',
53+
usage: { promptTokens: 20, completionTokens: 30, totalTokens: 50 },
54+
choices: [
55+
{
56+
message: {
57+
role: 'assistant',
58+
content: JSON.stringify({ steps }),
59+
},
60+
finishReason: 'stop',
61+
},
62+
],
63+
};
64+
}
65+
66+
/**
67+
* Returns a mock LLM response with a text-only reply (no tool calls).
68+
*/
69+
function makeTextResponse(text: string) {
70+
return {
71+
modelId: 'gpt-4.1-mini',
72+
usage: { promptTokens: 10, completionTokens: 5, totalTokens: 15 },
73+
choices: [
74+
{
75+
message: { role: 'assistant', content: text },
76+
finishReason: 'stop',
77+
},
78+
],
79+
};
80+
}
81+
82+
/**
83+
* Returns a mock LLM response that requests a tool call.
84+
*/
85+
function makeToolCallResponse(toolName: string, args: Record<string, unknown>) {
86+
return {
87+
modelId: 'gpt-4.1-mini',
88+
usage: { promptTokens: 10, completionTokens: 8, totalTokens: 18 },
89+
choices: [
90+
{
91+
message: {
92+
role: 'assistant',
93+
content: null,
94+
tool_calls: [
95+
{
96+
id: 'tc-1',
97+
type: 'function',
98+
function: {
99+
name: toolName,
100+
arguments: JSON.stringify(args),
101+
},
102+
},
103+
],
104+
},
105+
finishReason: 'tool_calls',
106+
},
107+
],
108+
};
109+
}
110+
111+
// ---------------------------------------------------------------------------
112+
// Tests
113+
// ---------------------------------------------------------------------------
114+
115+
describe('generateText — planning integration', () => {
116+
beforeEach(() => {
117+
hoisted.generateCompletion.mockReset();
118+
});
119+
120+
// -----------------------------------------------------------------------
121+
// 1. Planning disabled — no planning call
122+
// -----------------------------------------------------------------------
123+
it('does NOT make a planning call when planning is disabled (default)', async () => {
124+
hoisted.generateCompletion.mockResolvedValueOnce(
125+
makeTextResponse('Just a normal reply.'),
126+
);
127+
128+
const result = await generateText({
129+
model: 'openai:gpt-4.1-mini',
130+
prompt: 'Say hello.',
131+
});
132+
133+
// Only one call: the normal generation call — no planning call.
134+
expect(hoisted.generateCompletion).toHaveBeenCalledTimes(1);
135+
expect(result.text).toBe('Just a normal reply.');
136+
expect(result.plan).toBeUndefined();
137+
});
138+
139+
// -----------------------------------------------------------------------
140+
// 2. Planning enabled — plan call + tool loop
141+
// -----------------------------------------------------------------------
142+
it('makes a planning call then executes the tool loop when planning is enabled', async () => {
143+
const planSteps = [
144+
{ description: 'Look up the weather', tool: 'get_weather', reasoning: 'Need current data' },
145+
{ description: 'Summarise findings', tool: null, reasoning: 'Compose final answer' },
146+
];
147+
148+
// Call 1: planning call returns a plan
149+
hoisted.generateCompletion.mockResolvedValueOnce(makePlanResponse(planSteps));
150+
// Call 2: tool loop — model requests a tool call
151+
hoisted.generateCompletion.mockResolvedValueOnce(
152+
makeToolCallResponse('get_weather', { city: 'London' }),
153+
);
154+
// Call 3: tool loop — model provides final answer after tool result
155+
hoisted.generateCompletion.mockResolvedValueOnce(
156+
makeTextResponse('The weather in London is sunny.'),
157+
);
158+
159+
const mockTool = {
160+
name: 'get_weather',
161+
description: 'Get weather for a city',
162+
inputSchema: { type: 'object', properties: { city: { type: 'string' } } },
163+
execute: vi.fn(async () => ({ success: true, output: { temp: 22, condition: 'sunny' } })),
164+
};
165+
166+
const result = await generateText({
167+
model: 'openai:gpt-4.1-mini',
168+
prompt: 'What is the weather in London?',
169+
planning: true,
170+
maxSteps: 5,
171+
tools: { get_weather: mockTool } as any,
172+
});
173+
174+
// 3 calls total: 1 planning + 2 tool loop steps
175+
expect(hoisted.generateCompletion).toHaveBeenCalledTimes(3);
176+
177+
// The planning call should have a system prompt about planning
178+
const planningCallMessages = hoisted.generateCompletion.mock.calls[0][1];
179+
expect(planningCallMessages[0].content).toContain('planning');
180+
181+
// The second call (first tool loop step) should include the plan
182+
const toolLoopCallMessages = hoisted.generateCompletion.mock.calls[1][1];
183+
const systemMessages = toolLoopCallMessages.filter(
184+
(m: any) => m.role === 'system',
185+
);
186+
const planSystemMessage = systemMessages.find((m: any) =>
187+
String(m.content).includes('Follow this plan'),
188+
);
189+
expect(planSystemMessage).toBeDefined();
190+
expect(planSystemMessage.content).toContain('Look up the weather');
191+
expect(planSystemMessage.content).toContain('[tool: get_weather]');
192+
193+
// Result includes the plan
194+
expect(result.plan).toBeDefined();
195+
expect(result.plan!.steps).toHaveLength(2);
196+
expect(result.plan!.steps[0].description).toBe('Look up the weather');
197+
expect(result.plan!.steps[0].tool).toBe('get_weather');
198+
expect(result.plan!.steps[1].tool).toBeNull();
199+
200+
// Tool was actually called
201+
expect(mockTool.execute).toHaveBeenCalledTimes(1);
202+
expect(result.text).toBe('The weather in London is sunny.');
203+
204+
// Usage includes both planning and tool loop tokens
205+
expect(result.usage.promptTokens).toBe(20 + 10 + 10);
206+
expect(result.usage.completionTokens).toBe(30 + 8 + 5);
207+
});
208+
209+
// -----------------------------------------------------------------------
210+
// 3. Planning with custom config
211+
// -----------------------------------------------------------------------
212+
it('forwards custom PlanningConfig to the planning call', async () => {
213+
const customSystemPrompt = 'You are a meticulous planner. Output JSON only.';
214+
const planSteps = [
215+
{ description: 'Analyse input', tool: null, reasoning: 'Understand the request' },
216+
];
217+
218+
// Call 1: planning call
219+
hoisted.generateCompletion.mockResolvedValueOnce(makePlanResponse(planSteps));
220+
// Call 2: final answer
221+
hoisted.generateCompletion.mockResolvedValueOnce(
222+
makeTextResponse('Analysis complete.'),
223+
);
224+
225+
const result = await generateText({
226+
model: 'openai:gpt-4.1-mini',
227+
prompt: 'Analyse this data.',
228+
planning: {
229+
systemPrompt: customSystemPrompt,
230+
temperature: 0.1,
231+
maxTokens: 512,
232+
},
233+
});
234+
235+
expect(hoisted.generateCompletion).toHaveBeenCalledTimes(2);
236+
237+
// Verify planning call used custom config
238+
const planningCallMessages = hoisted.generateCompletion.mock.calls[0][1];
239+
expect(planningCallMessages[0].content).toBe(customSystemPrompt);
240+
241+
const planningCallOptions = hoisted.generateCompletion.mock.calls[0][2];
242+
expect(planningCallOptions.temperature).toBe(0.1);
243+
expect(planningCallOptions.maxTokens).toBe(512);
244+
245+
// Plan is present in result
246+
expect(result.plan).toBeDefined();
247+
expect(result.plan!.steps).toHaveLength(1);
248+
expect(result.plan!.steps[0].description).toBe('Analyse input');
249+
expect(result.text).toBe('Analysis complete.');
250+
});
251+
});

src/api/generateText.ts

Lines changed: 55 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,16 @@ export interface GenerateTextOptions {
162162
baseUrl?: string;
163163
/** Optional durable usage ledger configuration for helper-level accounting. */
164164
usageLedger?: AgentOSUsageLedgerOptions;
165+
/**
166+
* Chain-of-thought instruction prepended to the system prompt when tools
167+
* are available. Encourages the model to reason explicitly before choosing
168+
* an action.
169+
*
170+
* - `false` (default) — no CoT injection.
171+
* - `true` — inject the default CoT instruction.
172+
* - `string` — inject a custom CoT instruction.
173+
*/
174+
chainOfThought?: boolean | string;
165175
/**
166176
* Enable plan-then-execute mode. When `true` (or a {@link PlanningConfig}),
167177
* an upfront LLM call decomposes the task into numbered steps before the
@@ -217,6 +227,35 @@ export interface GenerateTextResult {
217227
plan?: Plan;
218228
}
219229

230+
// ---------------------------------------------------------------------------
231+
// Chain-of-thought helpers
232+
// ---------------------------------------------------------------------------
233+
234+
/**
235+
* Default chain-of-thought instruction prepended to the system prompt when
236+
* tools are available and `chainOfThought` is enabled. Encourages the model
237+
* to reason explicitly before selecting a tool or crafting a response.
238+
*/
239+
export const DEFAULT_COT_INSTRUCTION = `Before choosing an action, briefly reason about what you need to do and why. Consider:
240+
1. What information do you already have?
241+
2. What information do you need?
242+
3. Which tool is most appropriate and why?
243+
Then proceed with your tool call or response.`;
244+
245+
/**
246+
* Resolves the chain-of-thought instruction from the `chainOfThought` option.
247+
*
248+
* @param cot - The `chainOfThought` option value.
249+
* @returns The resolved CoT instruction string, or `undefined` if disabled.
250+
*
251+
* @internal
252+
*/
253+
export function resolveChainOfThought(cot: boolean | string | undefined): string | undefined {
254+
if (!cot) return undefined;
255+
if (typeof cot === 'string') return cot;
256+
return DEFAULT_COT_INSTRUCTION;
257+
}
258+
220259
// ---------------------------------------------------------------------------
221260
// Planning helpers
222261
// ---------------------------------------------------------------------------
@@ -379,7 +418,22 @@ export async function generateText(opts: GenerateTextOptions): Promise<GenerateT
379418

380419
// Build messages
381420
const messages: Array<Record<string, unknown>> = [];
382-
if (opts.system) messages.push({ role: 'system', content: opts.system });
421+
422+
// --- Chain-of-thought injection ---
423+
// When CoT is enabled and tools are provided, prepend a reasoning
424+
// instruction to the system prompt so the model explicitly reasons
425+
// before selecting a tool or composing a response.
426+
const cotInstruction = resolveChainOfThought(opts.chainOfThought);
427+
const hasTools = !!(opts.tools && (Array.isArray(opts.tools) ? opts.tools.length > 0 : Object.keys(opts.tools).length > 0));
428+
if (cotInstruction && hasTools) {
429+
const systemContent = opts.system
430+
? `${cotInstruction}\n\n${opts.system}`
431+
: cotInstruction;
432+
messages.push({ role: 'system', content: systemContent });
433+
} else if (opts.system) {
434+
messages.push({ role: 'system', content: opts.system });
435+
}
436+
383437
if (opts.messages) {
384438
for (const m of opts.messages) messages.push({ role: m.role, content: m.content });
385439
}

0 commit comments

Comments
 (0)