Skip to content

Commit e900d04

Browse files
committed
feat: add MissionPlanner — Tree of Thought decomposition with evaluation and refinement
1 parent c57a5aa commit e900d04

2 files changed

Lines changed: 597 additions & 0 deletions

File tree

Lines changed: 205 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,205 @@
1+
import { describe, it, expect, vi } from 'vitest';
2+
import { MissionPlanner } from '../planning/MissionPlanner.js';
3+
import type { PlannerConfig } from '../planning/types.js';
4+
import { DEFAULT_THRESHOLDS } from '../planning/types.js';
5+
6+
function createMockLlmCaller(responses: string[]) {
7+
let callIndex = 0;
8+
return vi.fn(async (_system: string, _user: string) => {
9+
const response = responses[callIndex] ?? '{}';
10+
callIndex++;
11+
return response;
12+
});
13+
}
14+
15+
const basePlannerConfig = (llmCaller: PlannerConfig['llmCaller']): PlannerConfig => ({
16+
branchCount: 3,
17+
autonomy: 'guardrailed',
18+
providerStrategy: { strategy: 'balanced' },
19+
thresholds: { ...DEFAULT_THRESHOLDS },
20+
costCap: 10.0,
21+
maxAgents: 10,
22+
maxToolForges: 5,
23+
maxExpansions: 8,
24+
maxDepth: 3,
25+
reevalInterval: 3,
26+
llmCaller,
27+
});
28+
29+
const makeBranchResponse = (strategy: string, summary: string) =>
30+
JSON.stringify({
31+
strategy,
32+
summary,
33+
nodes: [
34+
{
35+
id: 'researcher',
36+
type: 'gmi',
37+
role: 'Researcher',
38+
executorConfig: { type: 'gmi', instructions: 'Research the topic' },
39+
complexity: 0.7,
40+
estimatedTokens: 2000,
41+
},
42+
],
43+
edges: [
44+
{ source: '__START__', target: 'researcher', type: 'static' },
45+
{ source: 'researcher', target: '__END__', type: 'static' },
46+
],
47+
estimatedCost: 1.0,
48+
estimatedLatencyMs: 60000,
49+
});
50+
51+
const makeEvalResponse = (selectedBranchId: string, branches: Array<{ branchId: string; overall: number }>) =>
52+
JSON.stringify({
53+
evaluations: branches.map((b) => ({
54+
branchId: b.branchId,
55+
scores: {
56+
feasibility: 0.9,
57+
costEfficiency: 0.7,
58+
latency: 0.5,
59+
robustness: 0.6,
60+
overall: b.overall,
61+
},
62+
reasoning: `Score for ${b.branchId}`,
63+
})),
64+
recommendation: { selectedBranchId, reason: 'Best overall score' },
65+
});
66+
67+
const makeRefineResponse = (cost = 1.0, latency = 60000) =>
68+
JSON.stringify({
69+
refinements: [],
70+
toolGaps: [],
71+
finalEstimatedCost: cost,
72+
finalEstimatedLatencyMs: latency,
73+
});
74+
75+
describe('MissionPlanner', () => {
76+
describe('Phase 1: Divergent Exploration', () => {
77+
it('generates N candidate branches', async () => {
78+
const branch = makeBranchResponse('linear', 'Sequential pipeline');
79+
const llmCaller = createMockLlmCaller([
80+
branch,
81+
branch,
82+
branch,
83+
makeEvalResponse('branch_0', [
84+
{ branchId: 'branch_0', overall: 0.75 },
85+
{ branchId: 'branch_1', overall: 0.6 },
86+
{ branchId: 'branch_2', overall: 0.5 },
87+
]),
88+
makeRefineResponse(),
89+
]);
90+
91+
const planner = new MissionPlanner(basePlannerConfig(llmCaller));
92+
const result = await planner.plan('Research AI papers', { tools: [], providers: ['openai'] });
93+
94+
expect(result.allBranches).toHaveLength(3);
95+
expect(result.selectedBranch).toBeDefined();
96+
expect(result.compiledGraph).toBeDefined();
97+
expect(result.compiledGraph.nodes.length).toBeGreaterThan(0);
98+
});
99+
100+
it('survives partial branch failures', async () => {
101+
const branch = makeBranchResponse('linear', 'Sequential');
102+
const llmCaller = createMockLlmCaller([
103+
branch,
104+
'INVALID JSON !!!', // branch 1 fails
105+
branch,
106+
makeEvalResponse('branch_0', [
107+
{ branchId: 'branch_0', overall: 0.75 },
108+
{ branchId: 'branch_2', overall: 0.5 },
109+
]),
110+
makeRefineResponse(),
111+
]);
112+
113+
const planner = new MissionPlanner(basePlannerConfig(llmCaller));
114+
const result = await planner.plan('Test goal', { tools: [], providers: ['openai'] });
115+
116+
// Should have 2 branches (one failed)
117+
expect(result.allBranches.length).toBeLessThanOrEqual(3);
118+
expect(result.allBranches.length).toBeGreaterThanOrEqual(1);
119+
});
120+
});
121+
122+
describe('Phase 2: Evaluation', () => {
123+
it('scores branches and selects the best one', async () => {
124+
const llmCaller = createMockLlmCaller([
125+
makeBranchResponse('linear', 'Linear approach'),
126+
makeBranchResponse('parallel', 'Parallel approach'),
127+
makeBranchResponse('hierarchical', 'Hierarchical approach'),
128+
makeEvalResponse('branch_1', [
129+
{ branchId: 'branch_0', overall: 0.63 },
130+
{ branchId: 'branch_1', overall: 0.74 },
131+
{ branchId: 'branch_2', overall: 0.55 },
132+
]),
133+
makeRefineResponse(2.0, 30000),
134+
]);
135+
136+
const planner = new MissionPlanner(basePlannerConfig(llmCaller));
137+
const result = await planner.plan('Test goal', { tools: [], providers: ['openai'] });
138+
139+
expect(result.selectedBranch.branchId).toBe('branch_1');
140+
expect(result.selectedBranch.scores.overall).toBe(0.74);
141+
});
142+
});
143+
144+
describe('Phase 3: Refinement', () => {
145+
it('applies refinements from the reflexion pass', async () => {
146+
const refineWithAddition = JSON.stringify({
147+
refinements: [
148+
{
149+
type: 'add_node',
150+
description: 'Added fact checker',
151+
nodeId: 'fact_checker',
152+
patch: {
153+
id: 'fact_checker',
154+
type: 'gmi',
155+
executorConfig: { type: 'gmi', instructions: 'Verify claims' },
156+
executionMode: 'single_turn',
157+
effectClass: 'read',
158+
checkpoint: true,
159+
},
160+
},
161+
],
162+
toolGaps: [],
163+
finalEstimatedCost: 1.5,
164+
finalEstimatedLatencyMs: 90000,
165+
});
166+
167+
const llmCaller = createMockLlmCaller([
168+
makeBranchResponse('linear', 'Linear'),
169+
makeBranchResponse('linear', 'Linear'),
170+
makeBranchResponse('linear', 'Linear'),
171+
makeEvalResponse('branch_0', [{ branchId: 'branch_0', overall: 0.7 }]),
172+
refineWithAddition,
173+
]);
174+
175+
const planner = new MissionPlanner(basePlannerConfig(llmCaller));
176+
const result = await planner.plan('Test', { tools: [], providers: ['openai'] });
177+
178+
expect(result.refinements).toContain('Added fact checker');
179+
expect(result.compiledGraph.nodes.find((n) => n.id === 'fact_checker')).toBeDefined();
180+
});
181+
});
182+
183+
describe('Event streaming', () => {
184+
it('emits planning events in order', async () => {
185+
const branch = makeBranchResponse('linear', 'Sequential');
186+
const llmCaller = createMockLlmCaller([
187+
branch,
188+
branch,
189+
branch,
190+
makeEvalResponse('branch_0', [{ branchId: 'branch_0', overall: 0.75 }]),
191+
makeRefineResponse(),
192+
]);
193+
194+
const events: Array<{ type: string }> = [];
195+
const planner = new MissionPlanner(basePlannerConfig(llmCaller));
196+
await planner.plan('Test', { tools: [], providers: ['openai'] }, (e) => events.push(e));
197+
198+
const types = events.map((e) => e.type);
199+
expect(types[0]).toBe('mission:planning_start');
200+
expect(types).toContain('mission:branch_generated');
201+
expect(types).toContain('mission:branch_selected');
202+
expect(types).toContain('mission:graph_compiled');
203+
});
204+
});
205+
});

0 commit comments

Comments
 (0)