From 3657e47e31a337bd088c5ec571c59cf3ffafc940 Mon Sep 17 00:00:00 2001 From: konard Date: Sat, 25 Oct 2025 10:06:52 +0200 Subject: [PATCH 1/3] Initial commit with task details for issue #8 Adding CLAUDE.md with task information for AI processing. This file will be removed when the task is complete. Issue: undefined --- CLAUDE.md | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 CLAUDE.md diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..e035495 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,5 @@ +Issue to solve: undefined +Your prepared branch: issue-8-2bf69acf +Your prepared working directory: /tmp/gh-issue-solver-1761379609843 + +Proceed. \ No newline at end of file From 18593a866882318dc65aa232670f5a6a6fedc202 Mon Sep 17 00:00:00 2001 From: konard Date: Sat, 25 Oct 2025 10:20:17 +0200 Subject: [PATCH 2/3] feat: add Wikipedia page download test for all supported engines MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add comprehensive integration tests that verify the ability to download Wikipedia pages (https://en.wikipedia.org/wiki/Wikipedia) in all three formats (HTML, Markdown, and screenshot) using both Puppeteer and Playwright browser engines. Test coverage includes: - Downloading Wikipedia page as HTML (both engines) - Converting Wikipedia page to Markdown (both engines) - Capturing Wikipedia page as PNG screenshot (both engines) - Comparing content download across both engines Also updated jest.config.mjs to include integration tests in the test suite. Fixes #8 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- jest.config.mjs | 1 + tests/integration/wikipedia-download.test.js | 175 +++++++++++++++++++ 2 files changed, 176 insertions(+) create mode 100644 tests/integration/wikipedia-download.test.js diff --git a/jest.config.mjs b/jest.config.mjs index 557dffd..cd414ef 100644 --- a/jest.config.mjs +++ b/jest.config.mjs @@ -13,6 +13,7 @@ export default { testMatch: [ '**/tests/unit/**/*.test.js', '**/tests/mock/**/*.test.js', + '**/tests/integration/**/*.test.js', '**/tests/e2e/**/*.test.js' ], setupFiles: ['./tests/jest.setup.mjs'] diff --git a/tests/integration/wikipedia-download.test.js b/tests/integration/wikipedia-download.test.js new file mode 100644 index 0000000..b910ae8 --- /dev/null +++ b/tests/integration/wikipedia-download.test.js @@ -0,0 +1,175 @@ +import { createBrowser } from '../../src/browser.js'; +import { convertHtmlToMarkdown } from '../../src/lib.js'; + +const WIKIPEDIA_URL = 'https://en.wikipedia.org/wiki/Wikipedia'; + +describe('Wikipedia Page Download Tests', () => { + describe('Puppeteer Engine', () => { + let browser; + + beforeEach(async () => { + browser = await createBrowser('puppeteer'); + }); + + afterEach(async () => { + if (browser) { + await browser.close(); + } + }); + + it('can download Wikipedia page as HTML', async () => { + const page = await browser.newPage(); + await page.setViewport({ width: 1280, height: 800 }); + await page.goto(WIKIPEDIA_URL, { waitUntil: 'networkidle0', timeout: 60000 }); + + // Wait for page to fully load + await new Promise(resolve => setTimeout(resolve, 5000)); + + const html = await page.content(); + + // Verify HTML contains expected Wikipedia content + expect(html).toContain('Wikipedia'); + expect(html).toMatch(/ { + const page = await browser.newPage(); + await page.setViewport({ width: 1280, height: 800 }); + await page.goto(WIKIPEDIA_URL, { waitUntil: 'networkidle0', timeout: 60000 }); + + // Wait for page to fully load + await new Promise(resolve => setTimeout(resolve, 5000)); + + const html = await page.content(); + const markdown = convertHtmlToMarkdown(html, WIKIPEDIA_URL); + + // Verify Markdown contains expected Wikipedia content + expect(markdown).toContain('Wikipedia'); + expect(markdown.length).toBeGreaterThan(500); + // Should not contain main HTML structure tags + expect(markdown).not.toMatch(/ { + const page = await browser.newPage(); + await page.setViewport({ width: 1280, height: 800 }); + await page.goto(WIKIPEDIA_URL, { waitUntil: 'networkidle0', timeout: 60000 }); + + // Wait for page to fully load + await new Promise(resolve => setTimeout(resolve, 5000)); + + const screenshot = await page.screenshot({ type: 'png' }); + + // Verify screenshot is a valid PNG + expect(screenshot).toBeInstanceOf(Buffer); + expect(screenshot.length).toBeGreaterThan(1000); + + // Verify PNG signature + const pngSignature = Buffer.from([0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A]); + expect(screenshot.slice(0, 8)).toEqual(pngSignature); + }, 90000); + }); + + describe('Playwright Engine', () => { + let browser; + + beforeEach(async () => { + browser = await createBrowser('playwright'); + }); + + afterEach(async () => { + if (browser) { + await browser.close(); + } + }); + + it('can download Wikipedia page as HTML', async () => { + const page = await browser.newPage(); + await page.setViewport({ width: 1280, height: 800 }); + await page.goto(WIKIPEDIA_URL, { waitUntil: 'networkidle0', timeout: 60000 }); + + // Wait for page to fully load + await new Promise(resolve => setTimeout(resolve, 5000)); + + const html = await page.content(); + + // Verify HTML contains expected Wikipedia content + expect(html).toContain('Wikipedia'); + expect(html).toMatch(/ { + const page = await browser.newPage(); + await page.setViewport({ width: 1280, height: 800 }); + await page.goto(WIKIPEDIA_URL, { waitUntil: 'networkidle0', timeout: 60000 }); + + // Wait for page to fully load + await new Promise(resolve => setTimeout(resolve, 5000)); + + const html = await page.content(); + const markdown = convertHtmlToMarkdown(html, WIKIPEDIA_URL); + + // Verify Markdown contains expected Wikipedia content + expect(markdown).toContain('Wikipedia'); + expect(markdown.length).toBeGreaterThan(500); + // Should not contain main HTML structure tags + expect(markdown).not.toMatch(/ { + const page = await browser.newPage(); + await page.setViewport({ width: 1280, height: 800 }); + await page.goto(WIKIPEDIA_URL, { waitUntil: 'networkidle0', timeout: 60000 }); + + // Wait for page to fully load + await new Promise(resolve => setTimeout(resolve, 5000)); + + const screenshot = await page.screenshot({ type: 'png' }); + + // Verify screenshot is a valid PNG + expect(screenshot).toBeInstanceOf(Buffer); + expect(screenshot.length).toBeGreaterThan(1000); + + // Verify PNG signature + const pngSignature = Buffer.from([0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A]); + expect(screenshot.slice(0, 8)).toEqual(pngSignature); + }, 90000); + }); + + describe('Engine Comparison', () => { + it('both engines can download Wikipedia content successfully', async () => { + const puppeteerBrowser = await createBrowser('puppeteer'); + const playwrightBrowser = await createBrowser('playwright'); + + const puppeteerPage = await puppeteerBrowser.newPage(); + const playwrightPage = await playwrightBrowser.newPage(); + + await puppeteerPage.setViewport({ width: 1280, height: 800 }); + await playwrightPage.setViewport({ width: 1280, height: 800 }); + + await puppeteerPage.goto(WIKIPEDIA_URL, { waitUntil: 'networkidle0', timeout: 60000 }); + await playwrightPage.goto(WIKIPEDIA_URL, { waitUntil: 'networkidle0', timeout: 60000 }); + + // Wait for pages to fully load + await new Promise(resolve => setTimeout(resolve, 5000)); + + const puppeteerHtml = await puppeteerPage.content(); + const playwrightHtml = await playwrightPage.content(); + + // Both should contain Wikipedia content + expect(puppeteerHtml).toContain('Wikipedia'); + expect(playwrightHtml).toContain('Wikipedia'); + + // Both should be substantial HTML + expect(puppeteerHtml.length).toBeGreaterThan(1000); + expect(playwrightHtml.length).toBeGreaterThan(1000); + + await puppeteerBrowser.close(); + await playwrightBrowser.close(); + }, 120000); + }); +}); From c31c440a0aad6665d65d9ee4196ad4cf4ba963c8 Mon Sep 17 00:00:00 2001 From: konard Date: Sat, 25 Oct 2025 10:21:45 +0200 Subject: [PATCH 3/3] Revert "Initial commit with task details for issue #8" This reverts commit 3657e47e31a337bd088c5ec571c59cf3ffafc940. --- CLAUDE.md | 5 ----- 1 file changed, 5 deletions(-) delete mode 100644 CLAUDE.md diff --git a/CLAUDE.md b/CLAUDE.md deleted file mode 100644 index e035495..0000000 --- a/CLAUDE.md +++ /dev/null @@ -1,5 +0,0 @@ -Issue to solve: undefined -Your prepared branch: issue-8-2bf69acf -Your prepared working directory: /tmp/gh-issue-solver-1761379609843 - -Proceed. \ No newline at end of file