feat(orchestration): add judgeNode builder for LLM-as-judge evaluation

jddunn · jddunn · commit 3e176ccd0972 · 2026-03-23T22:45:33.000-07:00
diff --git a/src/orchestration/__tests__/judge-node.test.ts b/src/orchestration/__tests__/judge-node.test.ts
@@ -0,0 +1,46 @@
+import { describe, it, expect } from 'vitest';
+import { z } from 'zod';
+import { judgeNode } from '../builders/nodes.js';
+
+describe('judgeNode', () => {
+  it('creates a gmi node with judge instructions', () => {
+    const node = judgeNode({
+      rubric: 'Score accuracy (1-10) and credibility (1-10)',
+      schema: z.object({ accuracy: z.number(), credibility: z.number() }),
+    });
+    expect(node.type).toBe('gmi');
+    expect(node.executionMode).toBe('single_turn');
+    expect(node.executorConfig.type).toBe('gmi');
+    if (node.executorConfig.type === 'gmi') {
+      expect(node.executorConfig.instructions).toContain('evaluation judge');
+      expect(node.executorConfig.instructions).toContain('Score accuracy');
+    }
+  });
+
+  it('includes threshold in instructions', () => {
+    const node = judgeNode({
+      rubric: 'Score quality 1-10',
+      schema: z.object({ quality: z.number() }),
+      threshold: 7,
+    });
+    if (node.executorConfig.type === 'gmi') {
+      expect(node.executorConfig.instructions).toContain('7');
+      expect(node.executorConfig.instructions).toContain('Pass Threshold');
+    }
+  });
+
+  it('sets outputSchema from Zod schema', () => {
+    const node = judgeNode({
+      rubric: 'Rate it',
+      schema: z.object({ score: z.number() }),
+    });
+    expect(node.outputSchema).toBeDefined();
+  });
+
+  it('generates unique ID with judge prefix', () => {
+    const a = judgeNode({ rubric: 'r', schema: z.object({}) });
+    const b = judgeNode({ rubric: 'r', schema: z.object({}) });
+    expect(a.id).not.toBe(b.id);
+    expect(a.id).toMatch(/judge/);
+  });
+});
diff --git a/src/orchestration/builders/index.ts b/src/orchestration/builders/index.ts
@@ -1,4 +1,4 @@
-export { gmiNode, toolNode, humanNode, routerNode, guardrailNode, subgraphNode } from './nodes.js';
+export { gmiNode, toolNode, humanNode, routerNode, guardrailNode, subgraphNode, judgeNode } from './nodes.js';
 export type { NodePolicies } from './nodes.js';
 export { AgentGraph, CompiledAgentGraph } from './AgentGraph.js';
 export { workflow, WorkflowBuilder, CompiledWorkflow } from './WorkflowBuilder.js';
diff --git a/src/orchestration/builders/nodes.ts b/src/orchestration/builders/nodes.ts
@@ -1,4 +1,5 @@
 import type { GraphNode, GraphCondition, NodeExecutionMode, EffectClass, MemoryPolicy, DiscoveryPolicy, PersonaPolicy, GuardrailPolicy, RetryPolicy, CompiledExecutionGraph } from '../ir/types.js';
+import { lowerZodToJsonSchema } from '../compiler/SchemaLowering.js';
 
 export interface NodePolicies {
   memory?: MemoryPolicy;
@@ -114,6 +115,46 @@ export function guardrailNode(guardrailIds: string[], config: {
   };
 }
 
+/**
+ * Creates an LLM-as-judge evaluation node with structured rubric output.
+ * The judge is a gmiNode that enforces single_turn execution and structured JSON output.
+ *
+ * @param config.rubric - Evaluation criteria description
+ * @param config.schema - Zod schema for structured score output
+ * @param config.threshold - Optional minimum passing score per dimension
+ * @param config.model - Optional model override for the judge LLM
+ */
+export function judgeNode(config: {
+  rubric: string;
+  schema: any;
+  threshold?: number;
+  model?: string;
+}, policies?: NodePolicies): GraphNode {
+  const instructions = [
+    'You are an evaluation judge. Your task is to score content against a rubric.',
+    '',
+    '## Rubric',
+    config.rubric,
+    '',
+    '## Instructions',
+    '1. Read the content in the conversation carefully.',
+    '2. Score each dimension in the rubric on a scale of 1-10.',
+    '3. Respond with ONLY a JSON object matching the required schema.',
+    '4. Do not include any other text, explanation, or commentary.',
+    config.threshold
+      ? `\n## Pass Threshold\nA score of ${config.threshold} or higher on each dimension is required to pass.`
+      : '',
+  ].join('\n');
+
+  const base = gmiNode({ instructions, executionMode: 'single_turn' }, policies);
+
+  return {
+    ...base,
+    id: nextId('judge'),
+    outputSchema: lowerZodToJsonSchema(config.schema),
+  };
+}
+
 export function subgraphNode(compiledGraph: CompiledExecutionGraph, config?: {
   inputMapping?: Record<string, string>;
   outputMapping?: Record<string, string>;

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-export { gmiNode, toolNode, humanNode, routerNode, guardrailNode, subgraphNode } from './nodes.js';`
	`1`	`+export { gmiNode, toolNode, humanNode, routerNode, guardrailNode, subgraphNode, judgeNode } from './nodes.js';`
`2`	`2`	`export type { NodePolicies } from './nodes.js';`
`3`	`3`	`export { AgentGraph, CompiledAgentGraph } from './AgentGraph.js';`
`4`	`4`	`export { workflow, WorkflowBuilder, CompiledWorkflow } from './WorkflowBuilder.js';`