Skip to content

Commit 494fbe4

Browse files
committed
feat(strategies): accumulate costUSD/cache tokens, render debate as judge verdict
Strategy compilers (sequential/parallel/debate/graph/review-loop/ hierarchical) were initialising totalUsage with only promptTokens/ completionTokens/totalTokens, so costUSD/cacheReadTokens/ cacheCreationTokens were always undefined on agency.execute() and .stream() results. Hierarchical also discarded sub-agent usage entirely (only the manager turn was reported). - Replace accumulateCacheTokens with accumulateExtraUsage (forwards costUSD too) and add buildAgentCallUsage so per-call records keep cost + cache visibility down to AgentCallRecord.usage. Old name kept as a deprecated alias. - Hierarchical now sums sub-agent usage into a separate accumulator and merges it with the manager's usage at the end. - Widen AgentCallRecord.usage / CompiledStrategyStreamResult.usage / AgencyStreamResult.usage to expose cacheReadTokens + cacheCreationTokens (TokenUsage already had them; the strategy layer was the gap). Note for OpenAI: per-call cache tokens still come back undefined because the API does not expose them at that layer. Anthropic returns them and they will now propagate. Debate strategy was producing a balanced essay because the per-turn prompt told agents to "present your perspective" and the synthesis prompt asked for "a single coherent answer". Reframed: - Per-turn: opening statement (no hedging, take a side) for the first agent; rebuttal turns (quote + attack the opposing claim, advance one new point, ~150 word cap) for the rest. - Synthesis: the agency-level model is the JUDGE, rendering a structured verdict (Verdict / Why / strongest point per side / what was conceded). Explicitly forbid "on the other hand" framing. Updates the agency.test.ts expectations to assert the new prompt shape.
1 parent 404e328 commit 494fbe4

9 files changed

Lines changed: 405 additions & 167 deletions

File tree

src/api/runtime/__tests__/agency.test.ts

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -690,16 +690,18 @@ describe('Debate Strategy', () => {
690690
const strategy = compileDebate({ a: agentA, b: agentB }, agencyConfig);
691691
await strategy.execute('topic');
692692

693-
/* First agent in round 1 sees "You are the first to argue." */
693+
/* First agent in round 1 sees the OPENING-statement framing. */
694694
const aInput = (agentA.generate as ReturnType<typeof vi.fn>).mock.calls[0][0] as string;
695-
expect(aInput).toContain('You are the first to argue.');
696-
expect(aInput).toContain('Present your perspective as a');
695+
expect(aInput).toContain('Debate motion: topic');
696+
expect(aInput).toContain('OPENING statement');
697+
expect(aInput).toContain('You are "a"');
697698

698-
/* Second agent in round 1 sees agent A's argument. */
699+
/* Second agent in round 1 sees agent A's argument under the rebuttal framing. */
699700
const bInput = (agentB.generate as ReturnType<typeof vi.fn>).mock.calls[0][0] as string;
700-
expect(bInput).toContain('Previous arguments:');
701+
expect(bInput).toContain('Transcript of arguments so far');
701702
expect(bInput).toContain('[a, round 1]: A perspective');
702-
expect(bInput).toContain('Present your perspective as b');
703+
expect(bInput).toContain('You are "b"');
704+
expect(bInput).toContain('REBUTTAL turn');
703705
});
704706

705707
it('respects maxRounds configuration', async () => {
@@ -764,11 +766,14 @@ describe('Debate Strategy', () => {
764766
const strategy = compileDebate({ a: agentA, b: agentB }, agencyConfig);
765767
const result = (await strategy.execute('topic')) as { text: string };
766768

767-
/* The synthesis prompt includes all collected arguments. */
769+
/* The synthesis prompt includes all collected arguments and is framed
770+
as a JUDGE rendering a verdict (not a balanced summary). */
768771
const synthInput = hoisted.agentGenerate.mock.calls[0][0] as string;
769772
expect(synthInput).toContain('[a, round 1]: thesis');
770773
expect(synthInput).toContain('[b, round 1]: antithesis');
771-
expect(synthInput).toContain('Synthesize these perspectives');
774+
expect(synthInput).toContain('You are the JUDGE');
775+
expect(synthInput).toContain('**Verdict:**');
776+
expect(synthInput).toContain('Pick a winner');
772777

773778
expect(result.text).toBe('synthesis of debate');
774779
});

src/api/runtime/strategies/debate.ts

Lines changed: 91 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,30 @@ import type {
3434
AgentCallRecord,
3535
} from '../types.js';
3636
import { AgencyConfigError } from '../types.js';
37-
import { isAgent, mergeDefaults, resolveAgent, checkBeforeAgent, accumulateCacheTokens } from './shared.js';
37+
import {
38+
resolveAgent,
39+
checkBeforeAgent,
40+
accumulateExtraUsage,
41+
buildAgentCallUsage,
42+
} from './shared.js';
43+
44+
type StrategyTotalUsage = {
45+
promptTokens: number;
46+
completionTokens: number;
47+
totalTokens: number;
48+
costUSD?: number;
49+
cacheReadTokens?: number;
50+
cacheCreationTokens?: number;
51+
};
52+
53+
type ResultUsageSnapshot = {
54+
promptTokens?: number;
55+
completionTokens?: number;
56+
totalTokens?: number;
57+
costUSD?: number;
58+
cacheReadTokens?: number;
59+
cacheCreationTokens?: number;
60+
};
3861

3962
/**
4063
* Compiles a debate execution strategy.
@@ -78,7 +101,7 @@ export function compileDebate(
78101
async execute(prompt, opts) {
79102
const agentCalls: AgentCallRecord[] = [];
80103
const entries = Object.entries(agents);
81-
const totalUsage = { promptTokens: 0, completionTokens: 0, totalTokens: 0 };
104+
const totalUsage: StrategyTotalUsage = { promptTokens: 0, completionTokens: 0, totalTokens: 0 };
82105
const collectedArguments: string[] = [];
83106

84107
for (let round = 0; round < maxRounds; round++) {
@@ -92,22 +115,45 @@ export function compileDebate(
92115

93116
const a = resolveAgent(agentOrConfig, agencyConfig);
94117

95-
// Build the debate context: original task + all prior arguments.
96-
// The first agent in the first round sees "You are the first to argue."
97-
// which prevents confusion about missing prior context.
98-
const debateContext =
99-
`Task: ${prompt}\n\n` +
100-
(collectedArguments.length > 0
101-
? `Previous arguments:\n${collectedArguments.join('\n---\n')}`
102-
: 'You are the first to argue.') +
103-
`\n\nPresent your perspective as ${name} (round ${round + 1}/${maxRounds}).`;
118+
// Build the debate context: original task + transcript of prior
119+
// arguments. The framing is intentionally adversarial — agents are
120+
// told to take a side and rebut, not to "present a perspective" or
121+
// produce a balanced summary. The synthesis step is what unifies
122+
// the back-and-forth; the per-agent turns must stay sharp.
123+
const isOpening = collectedArguments.length === 0;
124+
const turnLabel = `Round ${round + 1} of ${maxRounds}`;
125+
const debateContext = isOpening
126+
? [
127+
`Debate motion: ${prompt}`,
128+
``,
129+
`You are "${name}". This is the OPENING statement (${turnLabel}).`,
130+
`Take a clear, defensible side on the motion above. Do NOT hedge,`,
131+
`do NOT produce a balanced summary, and do NOT acknowledge counterpoints`,
132+
`that have not yet been raised. State your position in one sentence,`,
133+
`then back it with your strongest 2–4 reasons. Speak in the first person.`,
134+
].join('\n')
135+
: [
136+
`Debate motion: ${prompt}`,
137+
``,
138+
`Transcript of arguments so far:`,
139+
collectedArguments.join('\n---\n'),
140+
``,
141+
`You are "${name}" (${turnLabel}). This is a REBUTTAL turn.`,
142+
`1. Quote or paraphrase the SINGLE strongest opposing claim made above.`,
143+
`2. Attack it directly — name the flaw, the missing evidence, or the`,
144+
` counter-example. Be specific; do not restate generalities.`,
145+
`3. Advance ONE new point of your own that the other side has not`,
146+
` addressed. Do not repeat earlier rounds.`,
147+
`Stay in character as "${name}". Speak in the first person. Be terse:`,
148+
`at most ~150 words. Do not produce a neutral summary.`,
149+
].join('\n');
104150

105151
const start = Date.now();
106152
const result = (await a.generate(debateContext, opts)) as Record<string, unknown>;
107153
const durationMs = Date.now() - start;
108154

109155
const resultText = (result.text as string) ?? '';
110-
const resultUsage = (result.usage as { promptTokens?: number; completionTokens?: number; totalTokens?: number }) ?? {};
156+
const resultUsage = (result.usage as ResultUsageSnapshot) ?? {};
111157
const resultToolCalls = (result.toolCalls as Array<{ name: string; args: unknown; result?: unknown; error?: string }>) ?? [];
112158

113159
// Label each argument with the agent name and round for traceability
@@ -119,30 +165,48 @@ export function compileDebate(
119165
input: debateContext,
120166
output: resultText,
121167
toolCalls: resultToolCalls,
122-
usage: {
123-
promptTokens: resultUsage.promptTokens ?? 0,
124-
completionTokens: resultUsage.completionTokens ?? 0,
125-
totalTokens: resultUsage.totalTokens ?? 0,
126-
},
168+
usage: buildAgentCallUsage(resultUsage),
127169
durationMs,
128170
});
129171

130172
totalUsage.promptTokens += resultUsage.promptTokens ?? 0;
131173
totalUsage.completionTokens += resultUsage.completionTokens ?? 0;
132174
totalUsage.totalTokens += resultUsage.totalTokens ?? 0;
133-
accumulateCacheTokens(totalUsage, resultUsage);
175+
accumulateExtraUsage(totalUsage, resultUsage);
134176
}
135177
}
136178

137-
// Synthesize all arguments into a final answer using the agency-level model.
179+
// Synthesize all arguments into a JUDGE'S VERDICT, not a balanced
180+
// essay. The synthesis is framed as adjudication: render a verdict,
181+
// explain which side carried the debate, and quote the decisive
182+
// arguments. Caller-supplied `instructions` are appended verbatim so
183+
// they can override or extend the verdict format.
138184
const synthInstructions = agencyConfig.instructions
139-
? `\n\n${agencyConfig.instructions}`
185+
? `\n\nAdditional instructions from the operator:\n${agencyConfig.instructions}`
140186
: '';
141187

142-
const synthPrompt =
143-
`A debate was held on the following task:\n"${prompt}"\n\n` +
144-
`All arguments:\n${collectedArguments.join('\n---\n')}\n\n` +
145-
`Synthesize these perspectives into a single coherent answer.${synthInstructions}`;
188+
const synthPrompt = [
189+
`You are the JUDGE of the following debate.`,
190+
``,
191+
`Motion:`,
192+
prompt,
193+
``,
194+
`Full transcript of the debate (each entry is one turn by one agent):`,
195+
collectedArguments.join('\n---\n'),
196+
``,
197+
`Render a verdict using exactly this structure:`,
198+
``,
199+
`**Verdict:** <one sentence: which side prevailed, or "split" if neither did>`,
200+
`**Why:** <2–4 sentences explaining which arguments were decisive and why>`,
201+
`**Strongest point for each side:**`,
202+
`- <agent name>: "<short quote or paraphrase of their best argument>"`,
203+
`- <agent name>: "<short quote or paraphrase of their best argument>"`,
204+
`**What was conceded or left unanswered:** <1–2 sentences>`,
205+
``,
206+
`Do NOT produce a balanced essay or "on the other hand" summary.`,
207+
`Do NOT introduce new arguments the agents did not make.`,
208+
`Be direct. Pick a winner unless the transcript is genuinely tied.${synthInstructions}`,
209+
].join('\n');
146210

147211
const synthesizer = createAgent({
148212
model: agencyConfig.model,
@@ -153,12 +217,12 @@ export function compileDebate(
153217
});
154218

155219
const synthesis = (await synthesizer.generate(synthPrompt, opts)) as unknown as Record<string, unknown>;
156-
const synthUsage = (synthesis.usage as { promptTokens?: number; completionTokens?: number; totalTokens?: number }) ?? {};
220+
const synthUsage = (synthesis.usage as ResultUsageSnapshot) ?? {};
157221

158222
totalUsage.promptTokens += synthUsage.promptTokens ?? 0;
159223
totalUsage.completionTokens += synthUsage.completionTokens ?? 0;
160224
totalUsage.totalTokens += synthUsage.totalTokens ?? 0;
161-
accumulateCacheTokens(totalUsage, synthUsage);
225+
accumulateExtraUsage(totalUsage, synthUsage);
162226

163227
return { ...synthesis, agentCalls, usage: totalUsage };
164228
},
@@ -181,12 +245,7 @@ export function compileDebate(
181245
yield { type: 'text' as const, text };
182246
})(),
183247
text: textPromise,
184-
usage: resultPromise.then((r) => r.usage as {
185-
promptTokens: number;
186-
completionTokens: number;
187-
totalTokens: number;
188-
costUSD?: number;
189-
}),
248+
usage: resultPromise.then((r) => r.usage as StrategyTotalUsage),
190249
agentCalls: resultPromise.then((r) => (r.agentCalls as AgentCallRecord[] | undefined) ?? []),
191250
};
192251
},

src/api/runtime/strategies/graph.ts

Lines changed: 39 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,31 @@ import type {
3535
} from '../types.js';
3636
import { createBufferedAsyncReplay } from '../streamBuffer.js';
3737
import { AgencyConfigError } from '../types.js';
38-
import { isAgent, mergeDefaults, checkBeforeAgent, accumulateCacheTokens } from './shared.js';
38+
import {
39+
isAgent,
40+
mergeDefaults,
41+
checkBeforeAgent,
42+
accumulateExtraUsage,
43+
buildAgentCallUsage,
44+
} from './shared.js';
45+
46+
type StrategyTotalUsage = {
47+
promptTokens: number;
48+
completionTokens: number;
49+
totalTokens: number;
50+
costUSD?: number;
51+
cacheReadTokens?: number;
52+
cacheCreationTokens?: number;
53+
};
54+
55+
type ResultUsageSnapshot = {
56+
promptTokens?: number;
57+
completionTokens?: number;
58+
totalTokens?: number;
59+
costUSD?: number;
60+
cacheReadTokens?: number;
61+
cacheCreationTokens?: number;
62+
};
3963

4064
// ---------------------------------------------------------------------------
4165
// Topological sort
@@ -116,7 +140,7 @@ export function compileGraph(
116140
async execute(prompt, opts) {
117141
const agentCalls: AgentCallRecord[] = [];
118142
const outputs = new Map<string, string>();
119-
const totalUsage = { promptTokens: 0, completionTokens: 0, totalTokens: 0 };
143+
const totalUsage: StrategyTotalUsage = { promptTokens: 0, completionTokens: 0, totalTokens: 0 };
120144
let lastResult: Record<string, unknown> | null = null;
121145

122146
for (const tier of tiers) {
@@ -152,7 +176,7 @@ export function compileGraph(
152176
const durationMs = Date.now() - start;
153177

154178
const resultText = (result.text as string) ?? '';
155-
const resultUsage = (result.usage as { promptTokens?: number; completionTokens?: number; totalTokens?: number }) ?? {};
179+
const resultUsage = (result.usage as ResultUsageSnapshot) ?? {};
156180
const resultToolCalls = (result.toolCalls as Array<{ name: string; args: unknown; result?: unknown; error?: string }>) ?? [];
157181

158182
outputs.set(name, resultText);
@@ -162,18 +186,14 @@ export function compileGraph(
162186
input: context,
163187
output: resultText,
164188
toolCalls: resultToolCalls,
165-
usage: {
166-
promptTokens: resultUsage.promptTokens ?? 0,
167-
completionTokens: resultUsage.completionTokens ?? 0,
168-
totalTokens: resultUsage.totalTokens ?? 0,
169-
},
189+
usage: buildAgentCallUsage(resultUsage),
170190
durationMs,
171191
});
172192

173193
totalUsage.promptTokens += resultUsage.promptTokens ?? 0;
174194
totalUsage.completionTokens += resultUsage.completionTokens ?? 0;
175195
totalUsage.totalTokens += resultUsage.totalTokens ?? 0;
176-
accumulateCacheTokens(totalUsage, resultUsage);
196+
accumulateExtraUsage(totalUsage, resultUsage);
177197

178198
return result;
179199
}),
@@ -190,7 +210,7 @@ export function compileGraph(
190210
stream(prompt, opts) {
191211
const startMs = Date.now();
192212
const agentCalls: AgentCallRecord[] = [];
193-
const totalUsage = { promptTokens: 0, completionTokens: 0, totalTokens: 0 };
213+
const totalUsage: StrategyTotalUsage = { promptTokens: 0, completionTokens: 0, totalTokens: 0 };
194214

195215
async function* streamGenerator(): AsyncGenerator<AgencyStreamPart> {
196216
const outputs = new Map<string, string>();
@@ -227,13 +247,13 @@ export function compileGraph(
227247
: createAgent({ ...mergeDefaults(config, agencyConfig) });
228248

229249
let agentText = '';
230-
let resultUsage = { promptTokens: 0, completionTokens: 0, totalTokens: 0 };
250+
let resultUsage: ResultUsageSnapshot = {};
231251
let resultToolCalls: Array<{ name: string; args: unknown; result?: unknown; error?: string }> = [];
232252
const agentStart = Date.now();
233253
const agentStream = a.stream(context, opts) as {
234254
textStream?: AsyncIterable<string>;
235255
text?: Promise<string>;
236-
usage?: Promise<{ promptTokens?: number; completionTokens?: number; totalTokens?: number }>;
256+
usage?: Promise<ResultUsageSnapshot>;
237257
toolCalls?: Promise<Array<{ name: string; args: unknown; result?: unknown; error?: string }>>;
238258
} | null;
239259

@@ -248,25 +268,15 @@ export function compileGraph(
248268
yield { type: 'text' as const, text: agentText, agent: name };
249269
}
250270
}
251-
const streamedUsage = (await Promise.resolve(agentStream.usage)) ?? {};
252-
resultUsage = {
253-
promptTokens: streamedUsage.promptTokens ?? 0,
254-
completionTokens: streamedUsage.completionTokens ?? 0,
255-
totalTokens: streamedUsage.totalTokens ?? 0,
256-
};
271+
resultUsage = (await Promise.resolve(agentStream.usage)) ?? {};
257272
resultToolCalls = (await Promise.resolve(agentStream.toolCalls)) ?? [];
258273
} else {
259274
const result = (await a.generate(context, opts)) as Record<string, unknown>;
260275
agentText = (result.text as string) ?? '';
261276
if (agentText) {
262277
yield { type: 'text' as const, text: agentText, agent: name };
263278
}
264-
const generatedUsage = (result.usage as { promptTokens?: number; completionTokens?: number; totalTokens?: number }) ?? {};
265-
resultUsage = {
266-
promptTokens: generatedUsage.promptTokens ?? 0,
267-
completionTokens: generatedUsage.completionTokens ?? 0,
268-
totalTokens: generatedUsage.totalTokens ?? 0,
269-
};
279+
resultUsage = (result.usage as ResultUsageSnapshot) ?? {};
270280
resultToolCalls = (result.toolCalls as Array<{ name: string; args: unknown; result?: unknown; error?: string }>) ?? [];
271281
}
272282

@@ -276,14 +286,14 @@ export function compileGraph(
276286
input: context,
277287
output: agentText,
278288
toolCalls: resultToolCalls,
279-
usage: resultUsage,
289+
usage: buildAgentCallUsage(resultUsage),
280290
durationMs: Date.now() - agentStart,
281291
});
282292

283-
totalUsage.promptTokens += resultUsage.promptTokens;
284-
totalUsage.completionTokens += resultUsage.completionTokens;
285-
totalUsage.totalTokens += resultUsage.totalTokens;
286-
accumulateCacheTokens(totalUsage, resultUsage);
293+
totalUsage.promptTokens += resultUsage.promptTokens ?? 0;
294+
totalUsage.completionTokens += resultUsage.completionTokens ?? 0;
295+
totalUsage.totalTokens += resultUsage.totalTokens ?? 0;
296+
accumulateExtraUsage(totalUsage, resultUsage);
287297

288298
yield {
289299
type: 'agent-end' as const,

0 commit comments

Comments
 (0)