Skip to content

Commit 9e468fa

Browse files
jddunnclaude
andcommitted
feat(grounding): add CheckGroundingTool for on-demand verification
Implements ITool<CheckGroundingInput, GroundingResult> that lets agents explicitly verify claims against source documents. Wraps plain-string sources as synthetic RagRetrievedChunk objects (relevanceScore: 1.0), extracts claims via ClaimExtractor, and verifies via GroundingChecker. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 9659e41 commit 9e468fa

File tree

2 files changed

+598
-0
lines changed

2 files changed

+598
-0
lines changed
Lines changed: 323 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,323 @@
1+
/**
2+
* @file CheckGroundingTool.ts
3+
* @description On-demand grounding verification tool that allows agents and
4+
* workflows to explicitly check whether claims in a given text are supported
5+
* by a set of source documents.
6+
*
7+
* Unlike the {@link GroundingGuardrail} (which runs automatically in the
8+
* guardrail pipeline), this tool is invoked explicitly via a tool call,
9+
* enabling agents to self-verify their own output or check external content
10+
* before presenting it to the user.
11+
*
12+
* ### Pipeline
13+
* 1. Wrap plain-string sources as synthetic {@link RagRetrievedChunk} objects
14+
* with `relevanceScore: 1.0` (since the caller explicitly selected them).
15+
* 2. Extract claims from the input text via {@link ClaimExtractor}.
16+
* 3. Verify all claims against the synthetic sources via
17+
* {@link GroundingChecker.checkClaims}.
18+
* 4. Aggregate results into a {@link GroundingResult}.
19+
*
20+
* @module agentos/extensions/packs/grounding-guard/tools/CheckGroundingTool
21+
*/
22+
23+
import type {
24+
ITool,
25+
ToolExecutionContext,
26+
ToolExecutionResult,
27+
JSONSchemaObject,
28+
} from '../../../../core/tools/ITool';
29+
import type { RagRetrievedChunk } from '../../../../rag/IRetrievalAugmentor';
30+
import type { GroundingResult, ClaimVerification } from '../types';
31+
import type { ClaimExtractor } from '../ClaimExtractor';
32+
import type { GroundingChecker } from '../GroundingChecker';
33+
34+
// ---------------------------------------------------------------------------
35+
// Input type
36+
// ---------------------------------------------------------------------------
37+
38+
/**
39+
* Input arguments for the `check_grounding` tool.
40+
*/
41+
export interface CheckGroundingInput {
42+
/**
43+
* The text containing claims to verify against source documents.
44+
*/
45+
text: string;
46+
47+
/**
48+
* Source document texts to verify the claims against.
49+
* Each string is treated as a separate source chunk.
50+
*/
51+
sources: string[];
52+
}
53+
54+
// ---------------------------------------------------------------------------
55+
// CheckGroundingTool
56+
// ---------------------------------------------------------------------------
57+
58+
/**
59+
* Tool that verifies whether claims in text are supported by source documents
60+
* using NLI entailment scoring.
61+
*
62+
* Exposed as `check_grounding` in the tool registry so that agents can
63+
* explicitly verify grounding on demand, without relying solely on the
64+
* automatic guardrail pipeline.
65+
*
66+
* @example
67+
* ```typescript
68+
* const tool = new CheckGroundingTool(checker, extractor);
69+
* const result = await tool.execute({
70+
* text: 'The capital of France is Paris.',
71+
* sources: ['Paris is the capital city of France.'],
72+
* }, context);
73+
*
74+
* if (result.success && result.output?.grounded) {
75+
* console.log('All claims are grounded!');
76+
* }
77+
* ```
78+
*/
79+
export class CheckGroundingTool implements ITool<CheckGroundingInput, GroundingResult> {
80+
// -------------------------------------------------------------------------
81+
// ITool metadata
82+
// -------------------------------------------------------------------------
83+
84+
/** Unique tool identifier. */
85+
readonly id = 'check_grounding';
86+
87+
/** Functional name used in LLM tool-call requests. */
88+
readonly name = 'check_grounding';
89+
90+
/** Human-readable display name for UIs and logs. */
91+
readonly displayName = 'Grounding Checker';
92+
93+
/**
94+
* Detailed description for LLM tool selection.
95+
* Explains what the tool does, when to use it, and what it returns.
96+
*/
97+
readonly description =
98+
'Verify that claims in text are supported by source documents using NLI entailment. ' +
99+
'Returns a grounding result with per-claim verdicts (supported, contradicted, unverifiable) ' +
100+
'and aggregate statistics. Use this to fact-check agent output against retrieved sources.';
101+
102+
/** Tool category for registry filtering. */
103+
readonly category = 'security';
104+
105+
/** Semantic version of this tool implementation. */
106+
readonly version = '1.0.0';
107+
108+
/**
109+
* This tool has no side effects — it only reads and analyses text.
110+
* Safe to call without user confirmation.
111+
*/
112+
readonly hasSideEffects = false;
113+
114+
/**
115+
* JSON Schema defining the expected input arguments.
116+
* Used by the LLM to construct valid tool-call payloads and by the
117+
* ToolExecutor for input validation.
118+
*/
119+
readonly inputSchema: JSONSchemaObject = {
120+
type: 'object',
121+
properties: {
122+
text: {
123+
type: 'string',
124+
description: 'Text containing claims to verify against source documents.',
125+
},
126+
sources: {
127+
type: 'array',
128+
items: { type: 'string' },
129+
description: 'Source document texts to verify claims against. Each string is a separate source.',
130+
},
131+
},
132+
required: ['text', 'sources'],
133+
};
134+
135+
// -------------------------------------------------------------------------
136+
// Private dependencies
137+
// -------------------------------------------------------------------------
138+
139+
/**
140+
* Grounding checker instance that runs NLI verification.
141+
* Injected at construction time from the pack factory.
142+
*/
143+
private readonly checker: GroundingChecker;
144+
145+
/**
146+
* Claim extractor instance for splitting text into atomic claims.
147+
* Injected at construction time from the pack factory.
148+
*/
149+
private readonly extractor: ClaimExtractor;
150+
151+
// -------------------------------------------------------------------------
152+
// Constructor
153+
// -------------------------------------------------------------------------
154+
155+
/**
156+
* Construct a new CheckGroundingTool.
157+
*
158+
* @param checker - {@link GroundingChecker} instance for NLI verification.
159+
* @param extractor - {@link ClaimExtractor} instance for claim extraction.
160+
*/
161+
constructor(checker: GroundingChecker, extractor: ClaimExtractor) {
162+
this.checker = checker;
163+
this.extractor = extractor;
164+
}
165+
166+
// -------------------------------------------------------------------------
167+
// ITool — execute
168+
// -------------------------------------------------------------------------
169+
170+
/**
171+
* Execute the grounding check.
172+
*
173+
* 1. Wraps the plain-string `sources` as synthetic {@link RagRetrievedChunk}
174+
* objects with `relevanceScore: 1.0`.
175+
* 2. Extracts claims from `text` via the {@link ClaimExtractor}.
176+
* 3. Verifies all claims via {@link GroundingChecker.checkClaims}.
177+
* 4. Aggregates results into a {@link GroundingResult}.
178+
*
179+
* @param args - Input arguments containing `text` and `sources`.
180+
* @param context - Tool execution context (unused but required by ITool).
181+
* @returns A {@link ToolExecutionResult} wrapping the {@link GroundingResult}.
182+
*/
183+
async execute(
184+
args: CheckGroundingInput,
185+
context: ToolExecutionContext,
186+
): Promise<ToolExecutionResult<GroundingResult>> {
187+
const { text, sources } = args;
188+
189+
// Validate inputs.
190+
if (!text || text.trim().length === 0) {
191+
return {
192+
success: false,
193+
error: 'Input text is empty or missing.',
194+
};
195+
}
196+
197+
if (!sources || sources.length === 0) {
198+
return {
199+
success: false,
200+
error: 'No source documents provided. At least one source is required.',
201+
};
202+
}
203+
204+
// Step 1: Wrap string sources as synthetic RagRetrievedChunk objects.
205+
// Each source is assigned a synthetic ID and maximum relevance (1.0)
206+
// since the caller explicitly selected these sources for verification.
207+
const ragChunks: RagRetrievedChunk[] = sources.map((content, index) => ({
208+
id: `synthetic-source-${index}`,
209+
content,
210+
originalDocumentId: `synthetic-doc-${index}`,
211+
relevanceScore: 1.0,
212+
}));
213+
214+
// Step 2: Extract claims from the input text.
215+
let claims;
216+
try {
217+
claims = await this.extractor.extract(text);
218+
} catch (err) {
219+
return {
220+
success: false,
221+
error: `Claim extraction failed: ${err instanceof Error ? err.message : String(err)}`,
222+
};
223+
}
224+
225+
// Handle the case where no verifiable claims are found.
226+
if (claims.length === 0) {
227+
const emptyResult: GroundingResult = {
228+
grounded: true,
229+
claims: [],
230+
totalClaims: 0,
231+
supportedCount: 0,
232+
contradictedCount: 0,
233+
unverifiableCount: 0,
234+
unverifiableRatio: 0,
235+
summary: '0 claims extracted — nothing to verify.',
236+
};
237+
238+
return {
239+
success: true,
240+
output: emptyResult,
241+
};
242+
}
243+
244+
// Step 3: Verify all claims against the synthetic RAG sources.
245+
let verifications: ClaimVerification[];
246+
try {
247+
verifications = await this.checker.checkClaims(claims, ragChunks);
248+
} catch (err) {
249+
return {
250+
success: false,
251+
error: `Claim verification failed: ${err instanceof Error ? err.message : String(err)}`,
252+
};
253+
}
254+
255+
// Step 4: Aggregate results into a GroundingResult.
256+
const result = this.aggregateResults(verifications);
257+
258+
return {
259+
success: true,
260+
output: result,
261+
};
262+
}
263+
264+
// -------------------------------------------------------------------------
265+
// Private helpers
266+
// -------------------------------------------------------------------------
267+
268+
/**
269+
* Aggregate per-claim verification results into a {@link GroundingResult}.
270+
*
271+
* Computes counts for each verdict type, the unverifiable ratio, and the
272+
* top-level grounded flag. The default max-unverifiable-ratio for the tool
273+
* is 0.5 (matching the guardrail default).
274+
*
275+
* @param verifications - Per-claim verification results.
276+
* @returns Aggregated grounding result.
277+
*/
278+
private aggregateResults(verifications: ClaimVerification[]): GroundingResult {
279+
const totalClaims = verifications.length;
280+
281+
let supportedCount = 0;
282+
let contradictedCount = 0;
283+
let unverifiableCount = 0;
284+
285+
for (const v of verifications) {
286+
switch (v.verdict) {
287+
case 'supported':
288+
supportedCount++;
289+
break;
290+
case 'contradicted':
291+
contradictedCount++;
292+
break;
293+
case 'unverifiable':
294+
unverifiableCount++;
295+
break;
296+
}
297+
}
298+
299+
// Compute ratio (guard against division by zero).
300+
const unverifiableRatio = totalClaims > 0 ? unverifiableCount / totalClaims : 0;
301+
302+
// Grounded = no contradictions AND unverifiable ratio within tolerance.
303+
const grounded = contradictedCount === 0 && unverifiableRatio <= 0.5;
304+
305+
// Build human-readable summary.
306+
const summary = [
307+
`${supportedCount}/${totalClaims} claims supported`,
308+
`${contradictedCount} contradicted`,
309+
`${unverifiableCount} unverifiable (ratio ${unverifiableRatio.toFixed(2)})`,
310+
].join(', ');
311+
312+
return {
313+
grounded,
314+
claims: verifications,
315+
totalClaims,
316+
supportedCount,
317+
contradictedCount,
318+
unverifiableCount,
319+
unverifiableRatio,
320+
summary,
321+
};
322+
}
323+
}

0 commit comments

Comments
 (0)