Skip to content

Commit dd89dbe

Browse files
committed
feat: integration test + barrel exports for mission orchestrator
366 tests pass across 27 files. Integration test covers full pipeline: plan (Tree of Thought) → assign (balanced strategy) → expand (guardrailed) → tools (request_expansion, manage_graph).
1 parent 0de273c commit dd89dbe

2 files changed

Lines changed: 271 additions & 0 deletions

File tree

Lines changed: 267 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,267 @@
1+
/**
2+
* @file mission-integration.test.ts
3+
* @description End-to-end integration test: plan → assign → expand pipeline.
4+
*/
5+
import { describe, it, expect, vi } from 'vitest';
6+
import { MissionPlanner } from '../planning/MissionPlanner.js';
7+
import { ProviderAssignmentEngine } from '../planning/ProviderAssignmentEngine.js';
8+
import { GraphExpander } from '../planning/GraphExpander.js';
9+
import { DEFAULT_THRESHOLDS } from '../planning/types.js';
10+
import type { PlannerConfig, GraphPatch } from '../planning/types.js';
11+
import { RequestExpansionTool } from '../tools/RequestExpansionTool.js';
12+
import { ManageGraphTool } from '../tools/ManageGraphTool.js';
13+
14+
describe('Mission Orchestrator Integration', () => {
15+
it('plans a mission, assigns providers, expands the graph, and tools work', async () => {
16+
// -----------------------------------------------------------------------
17+
// Mock LLM responses for the three planning phases
18+
// -----------------------------------------------------------------------
19+
20+
const branchResponse = JSON.stringify({
21+
strategy: 'parallel',
22+
summary: 'Parallel research with merge',
23+
nodes: [
24+
{
25+
id: 'researcher_1',
26+
type: 'gmi',
27+
role: 'Researcher 1',
28+
executorConfig: { type: 'gmi', instructions: 'Research topic A' },
29+
complexity: 0.7,
30+
estimatedTokens: 2000,
31+
},
32+
{
33+
id: 'researcher_2',
34+
type: 'gmi',
35+
role: 'Researcher 2',
36+
executorConfig: { type: 'gmi', instructions: 'Research topic B' },
37+
complexity: 0.7,
38+
estimatedTokens: 2000,
39+
},
40+
{
41+
id: 'merger',
42+
type: 'gmi',
43+
role: 'Merger',
44+
executorConfig: { type: 'gmi', instructions: 'Merge findings' },
45+
complexity: 0.3,
46+
estimatedTokens: 500,
47+
},
48+
],
49+
edges: [
50+
{ source: '__START__', target: 'researcher_1', type: 'static' },
51+
{ source: '__START__', target: 'researcher_2', type: 'static' },
52+
{ source: 'researcher_1', target: 'merger', type: 'static' },
53+
{ source: 'researcher_2', target: 'merger', type: 'static' },
54+
{ source: 'merger', target: '__END__', type: 'static' },
55+
],
56+
estimatedCost: 2.0,
57+
estimatedLatencyMs: 90000,
58+
});
59+
60+
const evalResponse = JSON.stringify({
61+
evaluations: [
62+
{
63+
branchId: 'branch_0',
64+
scores: {
65+
feasibility: 0.9,
66+
costEfficiency: 0.7,
67+
latency: 0.8,
68+
robustness: 0.6,
69+
overall: 0.75,
70+
},
71+
reasoning: 'Good parallel approach',
72+
},
73+
],
74+
recommendation: { selectedBranchId: 'branch_0', reason: 'Only candidate' },
75+
});
76+
77+
const refineResponse = JSON.stringify({
78+
refinements: [],
79+
toolGaps: [],
80+
finalEstimatedCost: 2.0,
81+
finalEstimatedLatencyMs: 90000,
82+
});
83+
84+
let callIndex = 0;
85+
const llmCaller = vi.fn(async () => {
86+
const responses = [
87+
branchResponse,
88+
branchResponse,
89+
branchResponse,
90+
evalResponse,
91+
refineResponse,
92+
];
93+
return responses[callIndex++] ?? '{}';
94+
});
95+
96+
const plannerConfig: PlannerConfig = {
97+
branchCount: 3,
98+
autonomy: 'guardrailed',
99+
providerStrategy: { strategy: 'balanced' },
100+
thresholds: { ...DEFAULT_THRESHOLDS },
101+
costCap: 10.0,
102+
maxAgents: 10,
103+
maxToolForges: 5,
104+
maxExpansions: 8,
105+
maxDepth: 3,
106+
reevalInterval: 3,
107+
llmCaller,
108+
};
109+
110+
// -----------------------------------------------------------------------
111+
// Phase 1-3: Plan
112+
// -----------------------------------------------------------------------
113+
114+
const planner = new MissionPlanner(plannerConfig);
115+
const events: Array<{ type: string }> = [];
116+
const result = await planner.plan(
117+
'Research AI frameworks and compare them',
118+
{
119+
tools: [{ name: 'web_search', description: 'Search the web' }],
120+
providers: ['openai', 'anthropic'],
121+
},
122+
(e) => events.push(e),
123+
);
124+
125+
expect(result.compiledGraph.nodes.length).toBe(3);
126+
expect(events.some((e) => e.type === 'mission:planning_start')).toBe(true);
127+
expect(events.some((e) => e.type === 'mission:graph_compiled')).toBe(true);
128+
129+
// -----------------------------------------------------------------------
130+
// Provider assignment
131+
// -----------------------------------------------------------------------
132+
133+
const engine = new ProviderAssignmentEngine(['openai', 'anthropic']);
134+
const nodesWithComplexity = result.compiledGraph.nodes.map((n) => ({
135+
...n,
136+
complexity: n.id === 'merger' ? 0.2 : 0.7,
137+
}));
138+
const assignments = engine.assign(nodesWithComplexity, { strategy: 'balanced' });
139+
140+
expect(assignments).toHaveLength(3);
141+
const merger = assignments.find((a) => a.nodeId === 'merger')!;
142+
expect(merger.model).toBe('gpt-4o-mini');
143+
144+
const researcher = assignments.find((a) => a.nodeId === 'researcher_1')!;
145+
expect(researcher.model).toBe('gpt-4o');
146+
147+
// Availability check
148+
const availability = engine.checkAvailability(assignments);
149+
expect(availability.available).toBe(true);
150+
151+
// -----------------------------------------------------------------------
152+
// Graph expansion
153+
// -----------------------------------------------------------------------
154+
155+
const expander = new GraphExpander({ ...DEFAULT_THRESHOLDS });
156+
const patch: GraphPatch = {
157+
addNodes: [
158+
{
159+
id: 'fact_checker',
160+
type: 'gmi',
161+
executorConfig: { type: 'gmi', instructions: 'Verify claims' },
162+
executionMode: 'single_turn',
163+
effectClass: 'read',
164+
checkpoint: true,
165+
},
166+
],
167+
addEdges: [{ id: 'e_new', source: 'merger', target: 'fact_checker', type: 'static' }],
168+
reason: 'Need fact verification after merge',
169+
estimatedCostDelta: 0.5,
170+
estimatedLatencyDelta: 30000,
171+
};
172+
173+
// Guardrailed mode: should auto-approve (below all thresholds)
174+
const shouldApprove = expander.shouldAutoApprove('guardrailed', {
175+
currentCost: 2.0,
176+
currentAgentCount: 3,
177+
currentExpansions: 0,
178+
currentToolForges: 0,
179+
patchCostDelta: 0.5,
180+
patchAgentDelta: 1,
181+
});
182+
expect(shouldApprove).toBe(true);
183+
184+
const expanded = expander.applyPatch(result.compiledGraph, patch);
185+
expect(expanded.nodes.length).toBe(4);
186+
expect(expanded.nodes.find((n) => n.id === 'fact_checker')).toBeDefined();
187+
expect(expanded.edges.find((e) => e.source === 'merger' && e.target === 'fact_checker')).toBeDefined();
188+
189+
// -----------------------------------------------------------------------
190+
// Expansion tools
191+
// -----------------------------------------------------------------------
192+
193+
const requestTool = new RequestExpansionTool();
194+
const requestResult = await requestTool.execute(
195+
{ gmiId: 'gmi-1', personaId: 'p-1', userContext: {} as any },
196+
{ need: 'Web scraper for changelog parsing', urgency: 'blocking' },
197+
);
198+
expect(requestResult.success).toBe(true);
199+
expect(requestResult.output?.acknowledged).toBe(true);
200+
201+
const manageTool = new ManageGraphTool();
202+
const manageResult = await manageTool.execute(
203+
{ gmiId: 'gmi-1', personaId: 'p-1', userContext: {} as any },
204+
{
205+
action: 'spawn_agent',
206+
spec: { role: 'fact_checker', instructions: 'Verify all claims' },
207+
reason: 'Quality assurance needed',
208+
},
209+
);
210+
expect(manageResult.success).toBe(true);
211+
expect(manageResult.output?.acknowledged).toBe(true);
212+
});
213+
214+
it('blocks expansion when guardrail thresholds are exceeded', () => {
215+
const expander = new GraphExpander({
216+
...DEFAULT_THRESHOLDS,
217+
maxTotalCost: 5.0,
218+
maxAgentCount: 4,
219+
});
220+
221+
// Cost exceeds cap
222+
expect(
223+
expander.shouldAutoApprove('guardrailed', {
224+
currentCost: 4.8,
225+
currentAgentCount: 2,
226+
currentExpansions: 0,
227+
currentToolForges: 0,
228+
patchCostDelta: 0.5,
229+
patchAgentDelta: 1,
230+
}),
231+
).toBe(false);
232+
233+
const exceeded = expander.getExceededThreshold({
234+
currentCost: 4.8,
235+
currentAgentCount: 2,
236+
currentExpansions: 0,
237+
currentToolForges: 0,
238+
patchCostDelta: 0.5,
239+
patchAgentDelta: 1,
240+
});
241+
expect(exceeded?.threshold).toBe('maxTotalCost');
242+
243+
// Agent count exceeds cap
244+
expect(
245+
expander.shouldAutoApprove('guardrailed', {
246+
currentCost: 1.0,
247+
currentAgentCount: 4,
248+
currentExpansions: 0,
249+
currentToolForges: 0,
250+
patchCostDelta: 0.5,
251+
patchAgentDelta: 1,
252+
}),
253+
).toBe(false);
254+
255+
// Autonomous mode ignores thresholds
256+
expect(
257+
expander.shouldAutoApprove('autonomous', {
258+
currentCost: 999,
259+
currentAgentCount: 999,
260+
currentExpansions: 999,
261+
currentToolForges: 999,
262+
patchCostDelta: 999,
263+
patchAgentDelta: 999,
264+
}),
265+
).toBe(true);
266+
});
267+
});

src/orchestration/index.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,3 +24,7 @@ export * from './runtime/index.js';
2424
export * from './builders/index.js';
2525
// Compiler (advanced use)
2626
export * from './compiler/index.js';
27+
// Planning (self-expanding mission orchestrator)
28+
export * from './planning/index.js';
29+
// Orchestration tools (request_expansion, manage_graph)
30+
export * from './tools/index.js';

0 commit comments

Comments
 (0)