From c35142811960bb177c5119d77fedf7d3d47e6cc7 Mon Sep 17 00:00:00 2001 From: konard Date: Sat, 25 Oct 2025 10:11:05 +0200 Subject: [PATCH 1/3] Initial commit with task details for issue #11 Adding CLAUDE.md with task information for AI processing. This file will be removed when the task is complete. Issue: undefined --- CLAUDE.md | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 CLAUDE.md diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..cf5d57e --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,5 @@ +Issue to solve: undefined +Your prepared branch: issue-11-2861fe92 +Your prepared working directory: /tmp/gh-issue-solver-1761379858529 + +Proceed. \ No newline at end of file From dec97091cd5e0bd0cb91009b0900eedf66a7d097 Mon Sep 17 00:00:00 2001 From: konard Date: Sat, 25 Oct 2025 10:22:04 +0200 Subject: [PATCH 2/3] feat: add integration tests for StackOverflow page download with markdown and image support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit implements comprehensive integration tests to verify that both Puppeteer and Playwright engines can: - Download the StackOverflow page at https://stackoverflow.com/questions/927358/how-do-i-undo-the-most-recent-local-commits-in-git - Convert the page HTML to markdown format - Capture screenshots of the page as PNG images Changes: - Add StackOverflow download tests for both Puppeteer and Playwright engines in tests/integration/browser-engines.test.js - Update jest.config.mjs to include integration tests in testMatch pattern - Fix Playwright adapter's setUserAgent implementation to use route interception (Playwright doesn't have page.setUserAgent()) - Increase timeout for StackOverflow tests to 60000ms for navigation and 90000ms for test completion to handle slower page loads All tests pass successfully, confirming that both browser engines work correctly for downloading and processing complex real-world pages. Fixes #11 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- jest.config.mjs | 3 +- src/browser.js | 15 +++- tests/integration/browser-engines.test.js | 99 +++++++++++++++++++++++ 3 files changed, 115 insertions(+), 2 deletions(-) diff --git a/jest.config.mjs b/jest.config.mjs index 557dffd..137d226 100644 --- a/jest.config.mjs +++ b/jest.config.mjs @@ -13,7 +13,8 @@ export default { testMatch: [ '**/tests/unit/**/*.test.js', '**/tests/mock/**/*.test.js', - '**/tests/e2e/**/*.test.js' + '**/tests/e2e/**/*.test.js', + '**/tests/integration/**/*.test.js' ], setupFiles: ['./tests/jest.setup.mjs'] }; \ No newline at end of file diff --git a/src/browser.js b/src/browser.js index 3fff7d1..6b6c447 100644 --- a/src/browser.js +++ b/src/browser.js @@ -130,12 +130,25 @@ function createPuppeteerPageAdapter(page) { * @returns {PageAdapter} */ function createPlaywrightPageAdapter(page) { + // Store user agent to apply before navigation + let userAgentToSet = null; + return { async setExtraHTTPHeaders(headers) { await page.setExtraHTTPHeaders(headers); }, async setUserAgent(userAgent) { - await page.setUserAgent(userAgent); + // Playwright doesn't have page.setUserAgent(), + // so we store it and use context.addInitScript or route + userAgentToSet = userAgent; + // Set user agent using route to intercept and modify + await page.route('**/*', (route) => { + const headers = route.request().headers(); + if (userAgentToSet) { + headers['user-agent'] = userAgentToSet; + } + route.continue({ headers }); + }); }, async setViewport(viewport) { // Playwright uses setViewportSize instead of setViewport diff --git a/tests/integration/browser-engines.test.js b/tests/integration/browser-engines.test.js index f9f2b20..45dba67 100644 --- a/tests/integration/browser-engines.test.js +++ b/tests/integration/browser-engines.test.js @@ -1,4 +1,5 @@ import { createBrowser } from '../../src/browser.js'; +import { convertHtmlToMarkdown } from '../../src/lib.js'; describe('Browser Engine Integration Tests', () => { describe('Puppeteer Engine', () => { @@ -101,4 +102,102 @@ describe('Browser Engine Integration Tests', () => { await playwrightBrowser.close(); }, 60000); }); + + describe('StackOverflow Page Download', () => { + const stackOverflowUrl = 'https://stackoverflow.com/questions/927358/how-do-i-undo-the-most-recent-local-commits-in-git'; + + describe('Puppeteer Engine', () => { + let browser; + + beforeEach(async () => { + browser = await createBrowser('puppeteer'); + }); + + afterEach(async () => { + if (browser) { + await browser.close(); + } + }); + + it('can download StackOverflow page and convert to markdown', async () => { + const page = await browser.newPage(); + await page.setViewport({ width: 1280, height: 800 }); + await page.goto(stackOverflowUrl, { waitUntil: 'networkidle0', timeout: 60000 }); + + const html = await page.content(); + expect(html).toBeTruthy(); + expect(html.length).toBeGreaterThan(1000); + + // Convert HTML to markdown + const markdown = convertHtmlToMarkdown(html, stackOverflowUrl); + expect(markdown).toBeTruthy(); + expect(markdown.length).toBeGreaterThan(100); + + // Verify markdown contains expected content + expect(markdown).toContain('How do I undo the most recent local commits in Git'); + expect(markdown).toContain('git'); + }, 90000); + + it('can take a screenshot of StackOverflow page', async () => { + const page = await browser.newPage(); + await page.setViewport({ width: 1280, height: 800 }); + await page.goto(stackOverflowUrl, { waitUntil: 'networkidle0', timeout: 60000 }); + + const screenshot = await page.screenshot({ type: 'png' }); + expect(screenshot).toBeInstanceOf(Buffer); + expect(screenshot.length).toBeGreaterThan(1000); + + // Verify it's a valid PNG + const pngSignature = Buffer.from([0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A]); + expect(screenshot.slice(0, 8).equals(pngSignature)).toBe(true); + }, 90000); + }); + + describe('Playwright Engine', () => { + let browser; + + beforeEach(async () => { + browser = await createBrowser('playwright'); + }); + + afterEach(async () => { + if (browser) { + await browser.close(); + } + }); + + it('can download StackOverflow page and convert to markdown', async () => { + const page = await browser.newPage(); + await page.setViewport({ width: 1280, height: 800 }); + await page.goto(stackOverflowUrl, { waitUntil: 'networkidle0', timeout: 60000 }); + + const html = await page.content(); + expect(html).toBeTruthy(); + expect(html.length).toBeGreaterThan(1000); + + // Convert HTML to markdown + const markdown = convertHtmlToMarkdown(html, stackOverflowUrl); + expect(markdown).toBeTruthy(); + expect(markdown.length).toBeGreaterThan(100); + + // Verify markdown contains expected content + expect(markdown).toContain('How do I undo the most recent local commits in Git'); + expect(markdown).toContain('git'); + }, 90000); + + it('can take a screenshot of StackOverflow page', async () => { + const page = await browser.newPage(); + await page.setViewport({ width: 1280, height: 800 }); + await page.goto(stackOverflowUrl, { waitUntil: 'networkidle0', timeout: 60000 }); + + const screenshot = await page.screenshot({ type: 'png' }); + expect(screenshot).toBeInstanceOf(Buffer); + expect(screenshot.length).toBeGreaterThan(1000); + + // Verify it's a valid PNG + const pngSignature = Buffer.from([0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A]); + expect(screenshot.slice(0, 8).equals(pngSignature)).toBe(true); + }, 90000); + }); + }); }); From 20163c7661df7204f673de261822005ff2b14d4c Mon Sep 17 00:00:00 2001 From: konard Date: Sat, 25 Oct 2025 10:23:18 +0200 Subject: [PATCH 3/3] Revert "Initial commit with task details for issue #11" This reverts commit c35142811960bb177c5119d77fedf7d3d47e6cc7. --- CLAUDE.md | 5 ----- 1 file changed, 5 deletions(-) delete mode 100644 CLAUDE.md diff --git a/CLAUDE.md b/CLAUDE.md deleted file mode 100644 index cf5d57e..0000000 --- a/CLAUDE.md +++ /dev/null @@ -1,5 +0,0 @@ -Issue to solve: undefined -Your prepared branch: issue-11-2861fe92 -Your prepared working directory: /tmp/gh-issue-solver-1761379858529 - -Proceed. \ No newline at end of file