Skip to content

Commit 461a8cf

Browse files
committed
fix(emergent): pre-parse syntax validation + actionable hints + classifier category
Forge attempts that produced ill-formed JavaScript (e.g. an LLM emitting `() => const x = 1` or sneaking in TypeScript `interface` / `: type` annotations) previously failed with an opaque 'Correctness FAIL: all test cases did not run' message because parsing failed before any test input touched the sandbox. Three fixes: 1. SandboxedToolForge.execute now parses the wrapped source via AsyncFunction before runInContext. SyntaxErrors short-circuit with a 'SyntaxError before execution' result plus a hint that maps common V8 error shapes (Unexpected token const/let/interface/:, Unexpected end of input, Unexpected reserved word) to concrete rewrite guidance. 2. ForgeToolMetaTool.sandbox-code description now spells out: pure JS (no TypeScript), block braces on every if/for/while, arrow fns with declarations use { ... return }, and outputs must match the declared schema without extra fields. 3. ForgeRejectionClassifier gets a new `syntax_error` category, evaluated before parse_error, so dashboards / retry loops can see syntax failures as a first-class bucket instead of bleeding into 'judge_correctness' or 'other'. Targeted suites: sandboxed-forge (24 \u2192 27) + forge-rejection-classifier (13 \u2192 17), full emergent suite green (290/290).
1 parent b6d83de commit 461a8cf

5 files changed

Lines changed: 170 additions & 1 deletion

File tree

src/emergent/ForgeRejectionClassifier.ts

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,15 @@ export type ForgeRejectionCategory =
4040
* non-deterministic behavior).
4141
*/
4242
| 'judge_correctness'
43+
/**
44+
* Implementation could not be parsed as JavaScript — typically an
45+
* LLM forge emitting TypeScript syntax (`interface`, `: type`),
46+
* declarations in the wrong position (`() => const x = ...`), or
47+
* single-line `if`/`for`/`while` without braces. Caught by the
48+
* sandbox pre-parse step or by a failing `execute` call that could
49+
* not even instantiate the function.
50+
*/
51+
| 'syntax_error'
4352
/**
4453
* Everything else. A non-zero `other` bucket is a signal to inspect
4554
* the raw reasons and consider adding a new category.
@@ -95,6 +104,17 @@ const PARSE_ERROR_PATTERNS = [
95104
'judge response was not valid json',
96105
];
97106

107+
const SYNTAX_ERROR_PATTERNS = [
108+
'syntaxerror',
109+
'syntax error',
110+
'unexpected token',
111+
'unexpected identifier',
112+
'unexpected end of input',
113+
'unexpected reserved word',
114+
'did not run successfully due to a syntax',
115+
'syntaxerror before execution',
116+
];
117+
98118
const JUDGE_CORRECTNESS_PATTERNS = [
99119
'logic error',
100120
'threshold ordering',
@@ -147,6 +167,9 @@ export function classifyForgeRejection(errorReason: string | undefined): ForgeRe
147167
for (const p of SHAPE_CHECK_PATTERNS) {
148168
if (lower.includes(p)) return 'shape_check';
149169
}
170+
for (const p of SYNTAX_ERROR_PATTERNS) {
171+
if (lower.includes(p)) return 'syntax_error';
172+
}
150173
for (const p of PARSE_ERROR_PATTERNS) {
151174
if (lower.includes(p)) return 'parse_error';
152175
}

src/emergent/ForgeToolMetaTool.ts

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,20 @@ export class ForgeToolMetaTool implements ITool<ForgeToolInput, ForgeResult> {
165165
type: 'object',
166166
properties: {
167167
mode: { type: 'string', const: 'sandbox' },
168-
code: { type: 'string' },
168+
code: {
169+
type: 'string',
170+
description:
171+
'Pure JavaScript (ES2020). MUST define `function execute(input) { ... }` ' +
172+
'(or an async equivalent) at the top level. RULES that block the judge from ' +
173+
'approving otherwise-correct tools: ' +
174+
'(1) NO TypeScript: no `: type` annotations, no `interface`, no `type` aliases, no `<T>` generics, no `enum`. ' +
175+
'(2) Every `if`/`for`/`while`/`else` body uses BLOCK BRACES `{ ... }` — ' +
176+
'single-line bodies with declarations are forbidden. ' +
177+
'(3) Arrow functions that declare variables must use `(...) => { const x = ...; return x; }` — ' +
178+
'never `(...) => const x = ...` (a SyntaxError). ' +
179+
'(4) Output MUST conform exactly to `outputSchema` — no extra fields. ' +
180+
'If the schema has `additionalProperties: false`, return ONLY the declared keys.',
181+
},
169182
allowlist: {
170183
type: 'array',
171184
items: { type: 'string', enum: ['fetch', 'fs.readFile', 'crypto'] },

src/emergent/SandboxedToolForge.ts

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,46 @@ export class SandboxedToolForge {
189189
* // result.violations === ['eval() is forbidden']
190190
* ```
191191
*/
192+
/**
193+
* Translate a raw V8 SyntaxError message into a concrete hint that
194+
* points the LLM (or the retry loop) at the likely cause. Returns
195+
* empty string when no known pattern matches — callers still surface
196+
* the raw message in that case. The hints map to the 4 most common
197+
* LLM forge mistakes observed in production.
198+
*/
199+
private describeSyntaxError(message: string): string {
200+
const m = message || '';
201+
if (/Unexpected token 'const'/.test(m) || /Unexpected token 'let'/.test(m)) {
202+
return (
203+
'A `const` or `let` appeared in a position JavaScript does not allow. ' +
204+
'Common causes: arrow function without braces (`() => const x = 1` — wrap in `{}` and add `return`), ' +
205+
'`if (x) const y = 1` without a block, or a declaration used as an expression. ' +
206+
'Every `if`/`for`/`while` body must use block braces; every arrow fn that declares variables must use `{ ... }` and `return`.'
207+
);
208+
}
209+
if (/Unexpected token '?:'?/.test(m)) {
210+
return (
211+
'Unexpected `:` — most likely a TypeScript type annotation leaked into the output. ' +
212+
'Output must be pure JavaScript. Remove all `: type` annotations, `interface` blocks, and generic brackets `<T>`.'
213+
);
214+
}
215+
if (/Unexpected reserved word/.test(m) && /interface|type/.test(m)) {
216+
return (
217+
'TypeScript-only keyword leaked into the output. Remove `interface`, `type`, `enum`, and `implements` — emit pure JavaScript only.'
218+
);
219+
}
220+
if (/Unexpected identifier/.test(m)) {
221+
return (
222+
'Two identifiers appeared adjacent without a binding keyword. ' +
223+
'Commonly caused by missing commas in objects or missing semicolons between statements.'
224+
);
225+
}
226+
if (/Unexpected end of input/.test(m)) {
227+
return 'Code is missing a closing `}`, `)`, or `]` somewhere. Check all brace/paren pairs.';
228+
}
229+
return '';
230+
}
231+
192232
validateCode(code: string, allowlist: SandboxAPI[]): { valid: boolean; violations: string[] } {
193233
const violations: string[] = [];
194234

@@ -295,6 +335,34 @@ export class SandboxedToolForge {
295335
})();
296336
`;
297337

338+
// Step 3.5: Pre-parse to isolate syntax errors with line-level hints
339+
// BEFORE running test cases. Without this, LLM-generated code that
340+
// has a `SyntaxError: Unexpected token 'const'` (common when the LLM
341+
// writes `() => const x = 1` or sneaks in TypeScript types) would
342+
// cause every test case to fail with the same opaque message, and
343+
// the judge's retry prompt wouldn't have enough context to fix it.
344+
// Throwing an AsyncFunction is the cheapest parse-only check that
345+
// covers the exact same async-IIFE shape we run in the VM — so any
346+
// syntax issue here is guaranteed to also fail in runInContext.
347+
try {
348+
// The wrappedCode is an expression (an IIFE), not statements,
349+
// so wrap it once more in a function body. `new AsyncFunction`
350+
// parses + instantiates without executing.
351+
const AsyncFunctionCtor = Object.getPrototypeOf(async function () {
352+
/* pre-parse probe */
353+
}).constructor as typeof Function;
354+
new AsyncFunctionCtor('return (async () => {' + request.code + '})();');
355+
} catch (err: unknown) {
356+
const msg = err instanceof Error ? err.message : String(err);
357+
const hint = this.describeSyntaxError(msg);
358+
return {
359+
success: false,
360+
error: `SyntaxError before execution: ${msg}${hint ? ` | Hint: ${hint}` : ''}`,
361+
executionTimeMs: Math.round(performance.now() - startTime),
362+
memoryUsedBytes: 0,
363+
};
364+
}
365+
298366
// Step 4: Execute in VM with timeout.
299367
try {
300368
const ctx = createContext(sandboxGlobals);

src/emergent/__tests__/forge-rejection-classifier.spec.ts

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,28 @@ const CASES: Array<{ input: string; expected: ForgeRejectionCategory; label: str
7979
input: 'Sandbox timeout exceeded after 10000ms',
8080
expected: 'other',
8181
},
82+
{
83+
label: 'syntax error — "Unexpected token const" from production',
84+
input:
85+
"Correctness FAIL: all provided test cases did not run successfully due to a syntax error in the implementation: 'SyntaxError: Unexpected token \\'const\\''",
86+
expected: 'syntax_error',
87+
},
88+
{
89+
label: 'syntax error — pre-parse message',
90+
input: 'SyntaxError before execution: Unexpected identifier | Hint: missing commas in objects',
91+
expected: 'syntax_error',
92+
},
93+
{
94+
label: 'syntax error — unexpected end of input (missing brace)',
95+
input:
96+
'Correctness FAIL: SyntaxError: Unexpected end of input — implementation is missing a closing brace',
97+
expected: 'syntax_error',
98+
},
99+
{
100+
label: 'syntax error — reserved word (TypeScript leaked)',
101+
input: 'SyntaxError: Unexpected reserved word interface in implementation',
102+
expected: 'syntax_error',
103+
},
82104
];
83105

84106
describe('classifyForgeRejection', () => {

src/emergent/__tests__/sandboxed-forge.spec.ts

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -402,4 +402,47 @@ describe('SandboxedToolForge', () => {
402402
expect(result.success).toBe(true);
403403
expect(result.output).toEqual({ total: 55, count: 3 });
404404
});
405+
406+
// -------------------------------------------------------------------------
407+
// Pre-parse: syntax errors surface with actionable hints (not generic
408+
// "test cases failed" opaqueness that the judge gets otherwise).
409+
// -------------------------------------------------------------------------
410+
it('pre-parse catches arrow-fn with const in expression position', async () => {
411+
const request = makeRequest(
412+
// Invalid: const in arrow body without braces
413+
'const calc = (x) => const doubled = x * 2;\nfunction execute(input) { return { out: calc(input.n) }; }',
414+
{ n: 5 },
415+
);
416+
417+
const result = await forge.execute(request);
418+
419+
expect(result.success).toBe(false);
420+
expect(result.error).toMatch(/SyntaxError before execution/);
421+
expect(result.error).toMatch(/arrow function without braces|wrap in `{}`/i);
422+
});
423+
424+
it('pre-parse catches TypeScript syntax leaks', async () => {
425+
const request = makeRequest(
426+
// Invalid JS: interface keyword
427+
'interface Input { n: number }\nfunction execute(input) { return { n: input.n }; }',
428+
{ n: 1 },
429+
);
430+
431+
const result = await forge.execute(request);
432+
433+
expect(result.success).toBe(false);
434+
expect(result.error).toMatch(/SyntaxError before execution/);
435+
});
436+
437+
it('pre-parse passes valid ES2020 code through to execution', async () => {
438+
const request = makeRequest(
439+
'async function execute(input) { return { doubled: input.n * 2 }; }',
440+
{ n: 7 },
441+
);
442+
443+
const result = await forge.execute(request);
444+
445+
expect(result.success).toBe(true);
446+
expect(result.output).toEqual({ doubled: 14 });
447+
});
405448
});

0 commit comments

Comments
 (0)