Skip to content

Commit 3e176cc

Browse files
committed
feat(orchestration): add judgeNode builder for LLM-as-judge evaluation
1 parent 6c79487 commit 3e176cc

3 files changed

Lines changed: 88 additions & 1 deletion

File tree

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
import { describe, it, expect } from 'vitest';
2+
import { z } from 'zod';
3+
import { judgeNode } from '../builders/nodes.js';
4+
5+
describe('judgeNode', () => {
6+
it('creates a gmi node with judge instructions', () => {
7+
const node = judgeNode({
8+
rubric: 'Score accuracy (1-10) and credibility (1-10)',
9+
schema: z.object({ accuracy: z.number(), credibility: z.number() }),
10+
});
11+
expect(node.type).toBe('gmi');
12+
expect(node.executionMode).toBe('single_turn');
13+
expect(node.executorConfig.type).toBe('gmi');
14+
if (node.executorConfig.type === 'gmi') {
15+
expect(node.executorConfig.instructions).toContain('evaluation judge');
16+
expect(node.executorConfig.instructions).toContain('Score accuracy');
17+
}
18+
});
19+
20+
it('includes threshold in instructions', () => {
21+
const node = judgeNode({
22+
rubric: 'Score quality 1-10',
23+
schema: z.object({ quality: z.number() }),
24+
threshold: 7,
25+
});
26+
if (node.executorConfig.type === 'gmi') {
27+
expect(node.executorConfig.instructions).toContain('7');
28+
expect(node.executorConfig.instructions).toContain('Pass Threshold');
29+
}
30+
});
31+
32+
it('sets outputSchema from Zod schema', () => {
33+
const node = judgeNode({
34+
rubric: 'Rate it',
35+
schema: z.object({ score: z.number() }),
36+
});
37+
expect(node.outputSchema).toBeDefined();
38+
});
39+
40+
it('generates unique ID with judge prefix', () => {
41+
const a = judgeNode({ rubric: 'r', schema: z.object({}) });
42+
const b = judgeNode({ rubric: 'r', schema: z.object({}) });
43+
expect(a.id).not.toBe(b.id);
44+
expect(a.id).toMatch(/judge/);
45+
});
46+
});

src/orchestration/builders/index.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
export { gmiNode, toolNode, humanNode, routerNode, guardrailNode, subgraphNode } from './nodes.js';
1+
export { gmiNode, toolNode, humanNode, routerNode, guardrailNode, subgraphNode, judgeNode } from './nodes.js';
22
export type { NodePolicies } from './nodes.js';
33
export { AgentGraph, CompiledAgentGraph } from './AgentGraph.js';
44
export { workflow, WorkflowBuilder, CompiledWorkflow } from './WorkflowBuilder.js';

src/orchestration/builders/nodes.ts

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import type { GraphNode, GraphCondition, NodeExecutionMode, EffectClass, MemoryPolicy, DiscoveryPolicy, PersonaPolicy, GuardrailPolicy, RetryPolicy, CompiledExecutionGraph } from '../ir/types.js';
2+
import { lowerZodToJsonSchema } from '../compiler/SchemaLowering.js';
23

34
export interface NodePolicies {
45
memory?: MemoryPolicy;
@@ -114,6 +115,46 @@ export function guardrailNode(guardrailIds: string[], config: {
114115
};
115116
}
116117

118+
/**
119+
* Creates an LLM-as-judge evaluation node with structured rubric output.
120+
* The judge is a gmiNode that enforces single_turn execution and structured JSON output.
121+
*
122+
* @param config.rubric - Evaluation criteria description
123+
* @param config.schema - Zod schema for structured score output
124+
* @param config.threshold - Optional minimum passing score per dimension
125+
* @param config.model - Optional model override for the judge LLM
126+
*/
127+
export function judgeNode(config: {
128+
rubric: string;
129+
schema: any;
130+
threshold?: number;
131+
model?: string;
132+
}, policies?: NodePolicies): GraphNode {
133+
const instructions = [
134+
'You are an evaluation judge. Your task is to score content against a rubric.',
135+
'',
136+
'## Rubric',
137+
config.rubric,
138+
'',
139+
'## Instructions',
140+
'1. Read the content in the conversation carefully.',
141+
'2. Score each dimension in the rubric on a scale of 1-10.',
142+
'3. Respond with ONLY a JSON object matching the required schema.',
143+
'4. Do not include any other text, explanation, or commentary.',
144+
config.threshold
145+
? `\n## Pass Threshold\nA score of ${config.threshold} or higher on each dimension is required to pass.`
146+
: '',
147+
].join('\n');
148+
149+
const base = gmiNode({ instructions, executionMode: 'single_turn' }, policies);
150+
151+
return {
152+
...base,
153+
id: nextId('judge'),
154+
outputSchema: lowerZodToJsonSchema(config.schema),
155+
};
156+
}
157+
117158
export function subgraphNode(compiledGraph: CompiledExecutionGraph, config?: {
118159
inputMapping?: Record<string, string>;
119160
outputMapping?: Record<string, string>;

0 commit comments

Comments
 (0)