|
| 1 | +/** |
| 2 | + * @file planning-integration.test.ts |
| 3 | + * @description Tests for the plan-then-execute integration in generateText. |
| 4 | + * |
| 5 | + * Validates three behaviours: |
| 6 | + * 1. Planning disabled (default) — no planning call is made. |
| 7 | + * 2. Planning enabled (`planning: true`) — an upfront planning call is made, |
| 8 | + * the plan is injected into the system prompt, and the tool loop proceeds. |
| 9 | + * 3. Planning with custom config — custom temperature, maxTokens, and system |
| 10 | + * prompt are forwarded to the planning call. |
| 11 | + */ |
| 12 | + |
| 13 | +import { describe, expect, it, vi, beforeEach } from 'vitest'; |
| 14 | + |
| 15 | +// --------------------------------------------------------------------------- |
| 16 | +// Mock setup — mirrors the pattern used by generateText.test.ts |
| 17 | +// --------------------------------------------------------------------------- |
| 18 | + |
| 19 | +const hoisted = vi.hoisted(() => { |
| 20 | + const generateCompletion = vi.fn(); |
| 21 | + const getProvider = vi.fn(() => ({ generateCompletion })); |
| 22 | + const createProviderManager = vi.fn(async () => ({ getProvider })); |
| 23 | + return { |
| 24 | + generateCompletion, |
| 25 | + getProvider, |
| 26 | + createProviderManager, |
| 27 | + }; |
| 28 | +}); |
| 29 | + |
| 30 | +vi.mock('../model.js', () => ({ |
| 31 | + resolveModelOption: vi.fn(() => ({ providerId: 'openai', modelId: 'gpt-4.1-mini' })), |
| 32 | + resolveProvider: vi.fn(() => ({ |
| 33 | + providerId: 'openai', |
| 34 | + modelId: 'gpt-4.1-mini', |
| 35 | + apiKey: 'test-key', |
| 36 | + })), |
| 37 | + createProviderManager: hoisted.createProviderManager, |
| 38 | +})); |
| 39 | + |
| 40 | +import { generateText } from '../generateText.js'; |
| 41 | + |
| 42 | +// --------------------------------------------------------------------------- |
| 43 | +// Helpers |
| 44 | +// --------------------------------------------------------------------------- |
| 45 | + |
| 46 | +/** |
| 47 | + * Returns a mock LLM response that mimics a successful plan generation. |
| 48 | + * The model returns a JSON plan with the given steps. |
| 49 | + */ |
| 50 | +function makePlanResponse(steps: Array<{ description: string; tool: string | null; reasoning: string }>) { |
| 51 | + return { |
| 52 | + modelId: 'gpt-4.1-mini', |
| 53 | + usage: { promptTokens: 20, completionTokens: 30, totalTokens: 50 }, |
| 54 | + choices: [ |
| 55 | + { |
| 56 | + message: { |
| 57 | + role: 'assistant', |
| 58 | + content: JSON.stringify({ steps }), |
| 59 | + }, |
| 60 | + finishReason: 'stop', |
| 61 | + }, |
| 62 | + ], |
| 63 | + }; |
| 64 | +} |
| 65 | + |
| 66 | +/** |
| 67 | + * Returns a mock LLM response with a text-only reply (no tool calls). |
| 68 | + */ |
| 69 | +function makeTextResponse(text: string) { |
| 70 | + return { |
| 71 | + modelId: 'gpt-4.1-mini', |
| 72 | + usage: { promptTokens: 10, completionTokens: 5, totalTokens: 15 }, |
| 73 | + choices: [ |
| 74 | + { |
| 75 | + message: { role: 'assistant', content: text }, |
| 76 | + finishReason: 'stop', |
| 77 | + }, |
| 78 | + ], |
| 79 | + }; |
| 80 | +} |
| 81 | + |
| 82 | +/** |
| 83 | + * Returns a mock LLM response that requests a tool call. |
| 84 | + */ |
| 85 | +function makeToolCallResponse(toolName: string, args: Record<string, unknown>) { |
| 86 | + return { |
| 87 | + modelId: 'gpt-4.1-mini', |
| 88 | + usage: { promptTokens: 10, completionTokens: 8, totalTokens: 18 }, |
| 89 | + choices: [ |
| 90 | + { |
| 91 | + message: { |
| 92 | + role: 'assistant', |
| 93 | + content: null, |
| 94 | + tool_calls: [ |
| 95 | + { |
| 96 | + id: 'tc-1', |
| 97 | + type: 'function', |
| 98 | + function: { |
| 99 | + name: toolName, |
| 100 | + arguments: JSON.stringify(args), |
| 101 | + }, |
| 102 | + }, |
| 103 | + ], |
| 104 | + }, |
| 105 | + finishReason: 'tool_calls', |
| 106 | + }, |
| 107 | + ], |
| 108 | + }; |
| 109 | +} |
| 110 | + |
| 111 | +// --------------------------------------------------------------------------- |
| 112 | +// Tests |
| 113 | +// --------------------------------------------------------------------------- |
| 114 | + |
| 115 | +describe('generateText — planning integration', () => { |
| 116 | + beforeEach(() => { |
| 117 | + hoisted.generateCompletion.mockReset(); |
| 118 | + }); |
| 119 | + |
| 120 | + // ----------------------------------------------------------------------- |
| 121 | + // 1. Planning disabled — no planning call |
| 122 | + // ----------------------------------------------------------------------- |
| 123 | + it('does NOT make a planning call when planning is disabled (default)', async () => { |
| 124 | + hoisted.generateCompletion.mockResolvedValueOnce( |
| 125 | + makeTextResponse('Just a normal reply.'), |
| 126 | + ); |
| 127 | + |
| 128 | + const result = await generateText({ |
| 129 | + model: 'openai:gpt-4.1-mini', |
| 130 | + prompt: 'Say hello.', |
| 131 | + }); |
| 132 | + |
| 133 | + // Only one call: the normal generation call — no planning call. |
| 134 | + expect(hoisted.generateCompletion).toHaveBeenCalledTimes(1); |
| 135 | + expect(result.text).toBe('Just a normal reply.'); |
| 136 | + expect(result.plan).toBeUndefined(); |
| 137 | + }); |
| 138 | + |
| 139 | + // ----------------------------------------------------------------------- |
| 140 | + // 2. Planning enabled — plan call + tool loop |
| 141 | + // ----------------------------------------------------------------------- |
| 142 | + it('makes a planning call then executes the tool loop when planning is enabled', async () => { |
| 143 | + const planSteps = [ |
| 144 | + { description: 'Look up the weather', tool: 'get_weather', reasoning: 'Need current data' }, |
| 145 | + { description: 'Summarise findings', tool: null, reasoning: 'Compose final answer' }, |
| 146 | + ]; |
| 147 | + |
| 148 | + // Call 1: planning call returns a plan |
| 149 | + hoisted.generateCompletion.mockResolvedValueOnce(makePlanResponse(planSteps)); |
| 150 | + // Call 2: tool loop — model requests a tool call |
| 151 | + hoisted.generateCompletion.mockResolvedValueOnce( |
| 152 | + makeToolCallResponse('get_weather', { city: 'London' }), |
| 153 | + ); |
| 154 | + // Call 3: tool loop — model provides final answer after tool result |
| 155 | + hoisted.generateCompletion.mockResolvedValueOnce( |
| 156 | + makeTextResponse('The weather in London is sunny.'), |
| 157 | + ); |
| 158 | + |
| 159 | + const mockTool = { |
| 160 | + name: 'get_weather', |
| 161 | + description: 'Get weather for a city', |
| 162 | + inputSchema: { type: 'object', properties: { city: { type: 'string' } } }, |
| 163 | + execute: vi.fn(async () => ({ success: true, output: { temp: 22, condition: 'sunny' } })), |
| 164 | + }; |
| 165 | + |
| 166 | + const result = await generateText({ |
| 167 | + model: 'openai:gpt-4.1-mini', |
| 168 | + prompt: 'What is the weather in London?', |
| 169 | + planning: true, |
| 170 | + maxSteps: 5, |
| 171 | + tools: { get_weather: mockTool } as any, |
| 172 | + }); |
| 173 | + |
| 174 | + // 3 calls total: 1 planning + 2 tool loop steps |
| 175 | + expect(hoisted.generateCompletion).toHaveBeenCalledTimes(3); |
| 176 | + |
| 177 | + // The planning call should have a system prompt about planning |
| 178 | + const planningCallMessages = hoisted.generateCompletion.mock.calls[0][1]; |
| 179 | + expect(planningCallMessages[0].content).toContain('planning'); |
| 180 | + |
| 181 | + // The second call (first tool loop step) should include the plan |
| 182 | + const toolLoopCallMessages = hoisted.generateCompletion.mock.calls[1][1]; |
| 183 | + const systemMessages = toolLoopCallMessages.filter( |
| 184 | + (m: any) => m.role === 'system', |
| 185 | + ); |
| 186 | + const planSystemMessage = systemMessages.find((m: any) => |
| 187 | + String(m.content).includes('Follow this plan'), |
| 188 | + ); |
| 189 | + expect(planSystemMessage).toBeDefined(); |
| 190 | + expect(planSystemMessage.content).toContain('Look up the weather'); |
| 191 | + expect(planSystemMessage.content).toContain('[tool: get_weather]'); |
| 192 | + |
| 193 | + // Result includes the plan |
| 194 | + expect(result.plan).toBeDefined(); |
| 195 | + expect(result.plan!.steps).toHaveLength(2); |
| 196 | + expect(result.plan!.steps[0].description).toBe('Look up the weather'); |
| 197 | + expect(result.plan!.steps[0].tool).toBe('get_weather'); |
| 198 | + expect(result.plan!.steps[1].tool).toBeNull(); |
| 199 | + |
| 200 | + // Tool was actually called |
| 201 | + expect(mockTool.execute).toHaveBeenCalledTimes(1); |
| 202 | + expect(result.text).toBe('The weather in London is sunny.'); |
| 203 | + |
| 204 | + // Usage includes both planning and tool loop tokens |
| 205 | + expect(result.usage.promptTokens).toBe(20 + 10 + 10); |
| 206 | + expect(result.usage.completionTokens).toBe(30 + 8 + 5); |
| 207 | + }); |
| 208 | + |
| 209 | + // ----------------------------------------------------------------------- |
| 210 | + // 3. Planning with custom config |
| 211 | + // ----------------------------------------------------------------------- |
| 212 | + it('forwards custom PlanningConfig to the planning call', async () => { |
| 213 | + const customSystemPrompt = 'You are a meticulous planner. Output JSON only.'; |
| 214 | + const planSteps = [ |
| 215 | + { description: 'Analyse input', tool: null, reasoning: 'Understand the request' }, |
| 216 | + ]; |
| 217 | + |
| 218 | + // Call 1: planning call |
| 219 | + hoisted.generateCompletion.mockResolvedValueOnce(makePlanResponse(planSteps)); |
| 220 | + // Call 2: final answer |
| 221 | + hoisted.generateCompletion.mockResolvedValueOnce( |
| 222 | + makeTextResponse('Analysis complete.'), |
| 223 | + ); |
| 224 | + |
| 225 | + const result = await generateText({ |
| 226 | + model: 'openai:gpt-4.1-mini', |
| 227 | + prompt: 'Analyse this data.', |
| 228 | + planning: { |
| 229 | + systemPrompt: customSystemPrompt, |
| 230 | + temperature: 0.1, |
| 231 | + maxTokens: 512, |
| 232 | + }, |
| 233 | + }); |
| 234 | + |
| 235 | + expect(hoisted.generateCompletion).toHaveBeenCalledTimes(2); |
| 236 | + |
| 237 | + // Verify planning call used custom config |
| 238 | + const planningCallMessages = hoisted.generateCompletion.mock.calls[0][1]; |
| 239 | + expect(planningCallMessages[0].content).toBe(customSystemPrompt); |
| 240 | + |
| 241 | + const planningCallOptions = hoisted.generateCompletion.mock.calls[0][2]; |
| 242 | + expect(planningCallOptions.temperature).toBe(0.1); |
| 243 | + expect(planningCallOptions.maxTokens).toBe(512); |
| 244 | + |
| 245 | + // Plan is present in result |
| 246 | + expect(result.plan).toBeDefined(); |
| 247 | + expect(result.plan!.steps).toHaveLength(1); |
| 248 | + expect(result.plan!.steps[0].description).toBe('Analyse input'); |
| 249 | + expect(result.text).toBe('Analysis complete.'); |
| 250 | + }); |
| 251 | +}); |
0 commit comments