Skip to content

Commit d831b04

Browse files
authored
feat(browser): advanced DOM snapshot engine with 13-layer pruning pipeline (#210)
Core Changes: - New dom-snapshot.ts: 13-layer LLM-optimized DOM pruning engine - Tag filtering, SVG collapse, ad/noise detection - CSS visibility, viewport threshold, paint-order occlusion - Shadow DOM traversal, same-origin iframe extraction - BBox parent-child dedup, attribute whitelist + synthetic attrs - Table → markdown serialization - Incremental diff (mark new elements with *) - data-opencli-ref annotation for precise click/type targeting - Hidden interactive element hints (scroll-to-reveal) New APIs: - IPage.scrollTo(ref) — scroll to snapshot-identified elements - IPage.getFormState() — extract all form fields as structured JSON - scrollToRefJs(), getFormStateJs() — standalone JS generators Integration: - Page (daemon) + CDPPage (direct CDP): use new engine as primary - dom-helpers click/type: 4-layer fallback (data-opencli-ref → data-ref → CSS → index) - Exports from browser/index.ts barrel Testing: - 21 new tests for dom-snapshot engine - All 283 tests pass (29 files, 1.07s) - Split test scripts: npm test (unit only), npm run test:all (full)
1 parent a228758 commit d831b04

File tree

11 files changed

+1133
-16
lines changed

11 files changed

+1133
-16
lines changed

package.json

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,9 @@
2828
"typecheck": "tsc --noEmit",
2929
"lint": "tsc --noEmit",
3030
"prepublishOnly": "npm run build",
31-
"test": "vitest run",
32-
"test:site": "node scripts/test-site.mjs",
33-
"test:watch": "vitest",
31+
"test": "vitest run --project unit",
32+
"test:all": "vitest run",
33+
"test:e2e": "vitest run --project e2e",
3434
"docs:dev": "vitepress dev docs",
3535
"docs:build": "vitepress build docs",
3636
"docs:preview": "vitepress preview docs"

src/browser/cdp.ts

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import { WebSocket, type RawData } from 'ws';
1212
import type { IPage } from '../types.js';
1313
import { wrapForEval } from './utils.js';
14+
import { generateSnapshotJs, scrollToRefJs, getFormStateJs } from './dom-snapshot.js';
1415
import {
1516
clickJs,
1617
typeTextJs,
@@ -193,9 +194,16 @@ class CDPPage implements IPage {
193194
: cookies;
194195
}
195196

196-
async snapshot(_opts?: any): Promise<any> {
197-
// CDP doesn't have a built-in accessibility tree equivalent without additional setup
198-
return '(snapshot not available in CDP mode)';
197+
async snapshot(opts: { interactive?: boolean; compact?: boolean; maxDepth?: number; raw?: boolean; viewportExpand?: number; maxTextLength?: number } = {}): Promise<any> {
198+
const snapshotJs = generateSnapshotJs({
199+
viewportExpand: opts.viewportExpand ?? 800,
200+
maxDepth: Math.max(1, Math.min(Number(opts.maxDepth) || 50, 200)),
201+
interactiveOnly: opts.interactive ?? false,
202+
maxTextLength: opts.maxTextLength ?? 120,
203+
includeScrollInfo: true,
204+
bboxDedup: true,
205+
});
206+
return this.evaluate(snapshotJs);
199207
}
200208

201209
// ── Shared DOM operations (P1 fix #5 — using dom-helpers.ts) ──
@@ -212,6 +220,14 @@ class CDPPage implements IPage {
212220
await this.evaluate(pressKeyJs(key));
213221
}
214222

223+
async scrollTo(ref: string): Promise<any> {
224+
return this.evaluate(scrollToRefJs(ref));
225+
}
226+
227+
async getFormState(): Promise<any> {
228+
return this.evaluate(getFormStateJs());
229+
}
230+
215231
async wait(options: any): Promise<void> {
216232
if (typeof options === 'number') {
217233
await new Promise(resolve => setTimeout(resolve, options * 1000));

src/browser/dom-helpers.ts

Lines changed: 38 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,21 @@ export function clickJs(ref: string): string {
1111
return `
1212
(() => {
1313
const ref = ${safeRef};
14-
const el = document.querySelector('[data-ref="' + ref + '"]')
15-
|| document.querySelectorAll('a, button, input, [role="button"], [tabindex]')[parseInt(ref, 10) || 0];
14+
// 1. data-opencli-ref (set by snapshot engine)
15+
let el = document.querySelector('[data-opencli-ref="' + ref + '"]');
16+
// 2. data-ref (legacy)
17+
if (!el) el = document.querySelector('[data-ref="' + ref + '"]');
18+
// 3. CSS selector
19+
if (!el && ref.match(/^[a-zA-Z#.\\[]/)) {
20+
try { el = document.querySelector(ref); } catch {}
21+
}
22+
// 4. Numeric index into interactive elements
23+
if (!el) {
24+
const idx = parseInt(ref, 10);
25+
if (!isNaN(idx)) {
26+
el = document.querySelectorAll('a, button, input, select, textarea, [role="button"], [tabindex]:not([tabindex="-1"])')[idx];
27+
}
28+
}
1629
if (!el) throw new Error('Element not found: ' + ref);
1730
el.scrollIntoView({ behavior: 'instant', block: 'center' });
1831
el.click();
@@ -28,13 +41,31 @@ export function typeTextJs(ref: string, text: string): string {
2841
return `
2942
(() => {
3043
const ref = ${safeRef};
31-
const el = document.querySelector('[data-ref="' + ref + '"]')
32-
|| document.querySelectorAll('input, textarea, [contenteditable]')[parseInt(ref, 10) || 0];
44+
// 1. data-opencli-ref (set by snapshot engine)
45+
let el = document.querySelector('[data-opencli-ref="' + ref + '"]');
46+
// 2. data-ref (legacy)
47+
if (!el) el = document.querySelector('[data-ref="' + ref + '"]');
48+
// 3. CSS selector
49+
if (!el && ref.match(/^[a-zA-Z#.\\[]/)) {
50+
try { el = document.querySelector(ref); } catch {}
51+
}
52+
// 4. Numeric index into typeable elements
53+
if (!el) {
54+
const idx = parseInt(ref, 10);
55+
if (!isNaN(idx)) {
56+
el = document.querySelectorAll('input, textarea, [contenteditable="true"]')[idx];
57+
}
58+
}
3359
if (!el) throw new Error('Element not found: ' + ref);
3460
el.focus();
35-
el.value = ${safeText};
36-
el.dispatchEvent(new Event('input', { bubbles: true }));
37-
el.dispatchEvent(new Event('change', { bubbles: true }));
61+
if (el.isContentEditable) {
62+
el.textContent = ${safeText};
63+
el.dispatchEvent(new Event('input', { bubbles: true }));
64+
} else {
65+
el.value = ${safeText};
66+
el.dispatchEvent(new Event('input', { bubbles: true }));
67+
el.dispatchEvent(new Event('change', { bubbles: true }));
68+
}
3869
return 'typed';
3970
})()
4071
`;

src/browser/dom-snapshot.test.ts

Lines changed: 249 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,249 @@
1+
/**
2+
* Tests for dom-snapshot.ts: DOM snapshot engine.
3+
*
4+
* Since the engine generates JavaScript strings for in-page evaluation,
5+
* these tests validate:
6+
* 1. The generated code is syntactically valid JS
7+
* 2. Options are correctly embedded
8+
* 3. The output structure matches expected format
9+
* 4. All features are present (Shadow DOM, iframe, table, diff, etc.)
10+
*/
11+
12+
import { describe, it, expect } from 'vitest';
13+
import { generateSnapshotJs, scrollToRefJs, getFormStateJs } from './dom-snapshot.js';
14+
15+
describe('generateSnapshotJs', () => {
16+
it('returns a non-empty string', () => {
17+
const js = generateSnapshotJs();
18+
expect(typeof js).toBe('string');
19+
expect(js.length).toBeGreaterThan(100);
20+
});
21+
22+
it('generates syntactically valid JS (can be parsed)', () => {
23+
const js = generateSnapshotJs();
24+
expect(() => new Function(js)).not.toThrow();
25+
});
26+
27+
it('embeds default options correctly', () => {
28+
const js = generateSnapshotJs();
29+
expect(js).toContain('VIEWPORT_EXPAND = 800');
30+
expect(js).toContain('MAX_DEPTH = 50');
31+
expect(js).toContain('INTERACTIVE_ONLY = false');
32+
expect(js).toContain('MAX_TEXT_LEN = 120');
33+
expect(js).toContain('INCLUDE_SCROLL_INFO = true');
34+
expect(js).toContain('BBOX_DEDUP = true');
35+
expect(js).toContain('INCLUDE_SHADOW_DOM = true');
36+
expect(js).toContain('INCLUDE_IFRAMES = true');
37+
expect(js).toContain('PAINT_ORDER_CHECK = true');
38+
expect(js).toContain('ANNOTATE_REFS = true');
39+
expect(js).toContain('REPORT_HIDDEN = true');
40+
expect(js).toContain('FILTER_ADS = true');
41+
expect(js).toContain('MARKDOWN_TABLES = true');
42+
expect(js).toContain('PREV_HASHES = null');
43+
});
44+
45+
it('embeds custom options correctly', () => {
46+
const js = generateSnapshotJs({
47+
viewportExpand: 2000,
48+
maxDepth: 30,
49+
interactiveOnly: true,
50+
maxTextLength: 200,
51+
includeScrollInfo: false,
52+
bboxDedup: false,
53+
includeShadowDom: false,
54+
includeIframes: false,
55+
maxIframes: 3,
56+
paintOrderCheck: false,
57+
annotateRefs: false,
58+
reportHidden: false,
59+
filterAds: false,
60+
markdownTables: false,
61+
});
62+
expect(js).toContain('VIEWPORT_EXPAND = 2000');
63+
expect(js).toContain('MAX_DEPTH = 30');
64+
expect(js).toContain('INTERACTIVE_ONLY = true');
65+
expect(js).toContain('MAX_TEXT_LEN = 200');
66+
expect(js).toContain('INCLUDE_SCROLL_INFO = false');
67+
expect(js).toContain('BBOX_DEDUP = false');
68+
expect(js).toContain('INCLUDE_SHADOW_DOM = false');
69+
expect(js).toContain('INCLUDE_IFRAMES = false');
70+
expect(js).toContain('MAX_IFRAMES = 3');
71+
expect(js).toContain('PAINT_ORDER_CHECK = false');
72+
expect(js).toContain('ANNOTATE_REFS = false');
73+
expect(js).toContain('REPORT_HIDDEN = false');
74+
expect(js).toContain('FILTER_ADS = false');
75+
expect(js).toContain('MARKDOWN_TABLES = false');
76+
});
77+
78+
it('clamps maxDepth between 1 and 200', () => {
79+
expect(generateSnapshotJs({ maxDepth: -5 })).toContain('MAX_DEPTH = 1');
80+
expect(generateSnapshotJs({ maxDepth: 999 })).toContain('MAX_DEPTH = 200');
81+
expect(generateSnapshotJs({ maxDepth: 75 })).toContain('MAX_DEPTH = 75');
82+
});
83+
84+
it('wraps output as an IIFE', () => {
85+
const js = generateSnapshotJs();
86+
expect(js.startsWith('(() =>')).toBe(true);
87+
expect(js.trimEnd().endsWith(')()')).toBe(true);
88+
});
89+
90+
it('embeds previousHashes for incremental diff', () => {
91+
const hashes = JSON.stringify(['12345', '67890']);
92+
const js = generateSnapshotJs({ previousHashes: hashes });
93+
expect(js).toContain('new Set(["12345","67890"])');
94+
});
95+
96+
it('includes all core features in generated code', () => {
97+
const js = generateSnapshotJs();
98+
99+
// Tag filtering
100+
expect(js).toContain('SKIP_TAGS');
101+
expect(js).toContain("'script'");
102+
expect(js).toContain("'style'");
103+
104+
// SVG collapsing
105+
expect(js).toContain('SVG_CHILDREN');
106+
107+
// Interactive detection
108+
expect(js).toContain('INTERACTIVE_TAGS');
109+
expect(js).toContain('INTERACTIVE_ROLES');
110+
expect(js).toContain('isInteractive');
111+
112+
// Visibility
113+
expect(js).toContain('isVisibleByCSS');
114+
expect(js).toContain('isInExpandedViewport');
115+
116+
// BBox dedup
117+
expect(js).toContain('isContainedBy');
118+
expect(js).toContain('PROPAGATING_TAGS');
119+
120+
// Shadow DOM
121+
expect(js).toContain('shadowRoot');
122+
expect(js).toContain('|shadow|');
123+
124+
// iframe
125+
expect(js).toContain('walkIframe');
126+
expect(js).toContain('|iframe|');
127+
128+
// Paint order
129+
expect(js).toContain('isOccludedByOverlay');
130+
expect(js).toContain('elementFromPoint');
131+
132+
// Ad filtering
133+
expect(js).toContain('isAdElement');
134+
expect(js).toContain('AD_PATTERNS');
135+
136+
// data-ref annotation
137+
expect(js).toContain('data-opencli-ref');
138+
139+
// Hidden elements report
140+
expect(js).toContain('hiddenInteractives');
141+
expect(js).toContain('hidden_interactive');
142+
143+
// Incremental diff
144+
expect(js).toContain('hashElement');
145+
expect(js).toContain('currentHashes');
146+
expect(js).toContain('__opencli_prev_hashes');
147+
148+
// Table serialization
149+
expect(js).toContain('serializeTable');
150+
expect(js).toContain('|table|');
151+
152+
// Synthetic attributes
153+
expect(js).toContain("'YYYY-MM-DD'");
154+
expect(js).toContain('value=••••');
155+
156+
// Page metadata
157+
expect(js).toContain('location.href');
158+
expect(js).toContain('document.title');
159+
});
160+
161+
it('contains proper attribute whitelist', () => {
162+
const js = generateSnapshotJs();
163+
const expectedAttrs = [
164+
'aria-label', 'aria-expanded', 'aria-checked', 'aria-selected',
165+
'placeholder', 'href', 'role', 'data-testid', 'autocomplete',
166+
];
167+
for (const attr of expectedAttrs) {
168+
expect(js).toContain(`'${attr}'`);
169+
}
170+
});
171+
172+
it('includes scroll info formatting', () => {
173+
const js = generateSnapshotJs();
174+
expect(js).toContain('scrollHeight');
175+
expect(js).toContain('scrollTop');
176+
expect(js).toContain('|scroll|');
177+
expect(js).toContain('page_scroll');
178+
});
179+
});
180+
181+
describe('scrollToRefJs', () => {
182+
it('generates valid JS', () => {
183+
const js = scrollToRefJs('42');
184+
expect(() => new Function(js)).not.toThrow();
185+
});
186+
187+
it('targets data-opencli-ref', () => {
188+
const js = scrollToRefJs('7');
189+
expect(js).toContain('data-opencli-ref');
190+
expect(js).toContain('scrollIntoView');
191+
expect(js).toContain('"7"');
192+
});
193+
194+
it('falls back to data-ref', () => {
195+
const js = scrollToRefJs('3');
196+
expect(js).toContain('data-ref');
197+
});
198+
199+
it('returns scrolled info', () => {
200+
const js = scrollToRefJs('1');
201+
expect(js).toContain('scrolled: true');
202+
expect(js).toContain('tag:');
203+
});
204+
});
205+
206+
describe('getFormStateJs', () => {
207+
it('generates valid JS', () => {
208+
const js = getFormStateJs();
209+
expect(() => new Function(js)).not.toThrow();
210+
});
211+
212+
it('collects form elements', () => {
213+
const js = getFormStateJs();
214+
expect(js).toContain('document.forms');
215+
expect(js).toContain('form.elements');
216+
});
217+
218+
it('collects orphan fields', () => {
219+
const js = getFormStateJs();
220+
expect(js).toContain('orphanFields');
221+
expect(js).toContain('el.form');
222+
});
223+
224+
it('handles different input types', () => {
225+
const js = getFormStateJs();
226+
expect(js).toContain('checkbox');
227+
expect(js).toContain('radio');
228+
expect(js).toContain('password');
229+
expect(js).toContain('contenteditable');
230+
});
231+
232+
it('extracts labels', () => {
233+
const js = getFormStateJs();
234+
expect(js).toContain('aria-label');
235+
expect(js).toContain('label[for=');
236+
expect(js).toContain('closest');
237+
expect(js).toContain('placeholder');
238+
});
239+
240+
it('masks passwords', () => {
241+
const js = getFormStateJs();
242+
expect(js).toContain('••••');
243+
});
244+
245+
it('includes data-opencli-ref in output', () => {
246+
const js = getFormStateJs();
247+
expect(js).toContain('data-opencli-ref');
248+
});
249+
});

0 commit comments

Comments
 (0)