From 6ced19b628b953a80791d3d610330865e4134342 Mon Sep 17 00:00:00 2001 From: Ned Twigg Date: Thu, 18 Sep 2025 13:10:32 -0700 Subject: [PATCH 1/6] fix a CSP issue in corpus-view --- tests/corpus-view.ts | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/tests/corpus-view.ts b/tests/corpus-view.ts index 18eafa1..facbfa8 100644 --- a/tests/corpus-view.ts +++ b/tests/corpus-view.ts @@ -39,7 +39,8 @@ const WEBEXTENSION_POLYFILL_REPLACEMENT = 'console.warn("Webextension-polyfill check bypassed for corpus testing")' const BROWSER_API_MOCKS = 'window.chrome=window.chrome||{runtime:{getURL:path=>"chrome-extension://gitcasso-test/"+path,onMessage:{addListener:()=>{}},sendMessage:()=>Promise.resolve(),id:"gitcasso-test"}};window.browser=window.chrome;' -const PERMISSIVE_CSP = "default-src 'self' 'unsafe-inline' 'unsafe-eval' data: blob: http: https:;" +const PERMISSIVE_CSP = + "default-src 'self' 'unsafe-inline' 'unsafe-eval' data: blob: http: https:; connect-src 'self' http: https:; script-src 'self' 'unsafe-inline' 'unsafe-eval';" // UI Styles const REBUILD_BUTTON_STYLES = ` @@ -237,6 +238,10 @@ app.get('/corpus/:key/:mode(clean|gitcasso)', async (req, res) => { // Replace external URLs with local asset URLs let html = mainEntry.response.content.text! + + // Strip CSP headers that might block our injected scripts + html = stripCSPFromHTML(html) + domains.forEach((domain) => { const escapedDomain = domain.replace(/[.*+?^${}()|[\]\\]/g, '\\$&') const regex = new RegExp(`https?://${escapedDomain}`, 'g') @@ -245,6 +250,13 @@ app.get('/corpus/:key/:mode(clean|gitcasso)', async (req, res) => { if (mode === 'gitcasso') { html = injectGitcassoScriptForHAR(key, html) } + + // Set permissive headers for HAR corpus to allow rebuild requests + res.set({ + 'Content-Security-Policy': PERMISSIVE_CSP, + 'X-Content-Type-Options': 'nosniff', + }) + return res.send(html) } else if (entry.type === 'html') { // Handle HTML corpus @@ -385,12 +397,15 @@ app.listen(PORT, () => { // Strip CSP meta tags and headers from HTML that might block our scripts function stripCSPFromHTML(html: string): string { - // Remove CSP meta tags - html = html.replace(/]*http-equiv\s*=\s*["']content-security-policy["'][^>]*>/gi, '') - html = html.replace(/]*name\s*=\s*["']content-security-policy["'][^>]*>/gi, '') + // Remove CSP meta tags - more comprehensive patterns + html = html.replace(/]*http-equiv\s*=\s*["']?content-security-policy["']?[^>]*>/gi, '') + html = html.replace(/]*name\s*=\s*["']?content-security-policy["']?[^>]*>/gi, '') + + // Also match patterns where content-security-policy appears anywhere in the meta tag + html = html.replace(/]*content-security-policy[^>]*>/gi, '') // Remove any other restrictive security meta tags - html = html.replace(/]*http-equiv\s*=\s*["']x-content-type-options["'][^>]*>/gi, '') + html = html.replace(/]*http-equiv\s*=\s*["']?x-content-type-options["']?[^>]*>/gi, '') return html } From 6432f77c278bdef47938ec07434f80441637d1c9 Mon Sep 17 00:00:00 2001 From: Ned Twigg Date: Thu, 18 Sep 2025 13:12:26 -0700 Subject: [PATCH 2/6] Rename `corpus:view` to just `corpus`. --- .claude/agents/corpus-fixer.md | 6 +++--- .claude/commands/corpus-loop.md | 2 +- CONTRIBUTING.md | 4 ++-- package.json | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.claude/agents/corpus-fixer.md b/.claude/agents/corpus-fixer.md index 6125f20..363a619 100644 --- a/.claude/agents/corpus-fixer.md +++ b/.claude/agents/corpus-fixer.md @@ -1,16 +1,16 @@ --- name: corpus-fixer -description: Use this agent when you need to fix or improve the detection logic for a specific Gitcasso corpus by testing changes in the corpus:view development environment. Examples: Context: User has identified issues with comment spot detection in a specific corpus and wants to test fixes. user: 'The comment detection is missing some spots in corpus ABC123, can you help fix the enhancer logic?' assistant: 'I'll use the corpus-fixer agent to investigate and fix the detection issues in that corpus.' Since the user wants to fix detection logic for a specific corpus, use the corpus-fixer agent to run the corpus:view environment and test changes. Context: User wants to validate that recent changes to an enhancer are working correctly. user: 'I made some changes to the GitHub enhancer, can you test it against corpus XYZ789?' assistant: 'Let me use the corpus-fixer agent to test your enhancer changes against that specific corpus.' The user wants to test enhancer changes against a specific corpus, so use the corpus-fixer agent to validate the changes in the corpus:view environment. +description: Use this agent when you need to fix or improve the detection logic for a specific Gitcasso corpus by testing changes in the corpus development environment. Examples: Context: User has identified issues with comment spot detection in a specific corpus and wants to test fixes. user: 'The comment detection is missing some spots in corpus ABC123, can you help fix the enhancer logic?' assistant: 'I'll use the corpus-fixer agent to investigate and fix the detection issues in that corpus.' Since the user wants to fix detection logic for a specific corpus, use the corpus-fixer agent to run the corpus environment and test changes. Context: User wants to validate that recent changes to an enhancer are working correctly. user: 'I made some changes to the GitHub enhancer, can you test it against corpus XYZ789?' assistant: 'Let me use the corpus-fixer agent to test your enhancer changes against that specific corpus.' The user wants to test enhancer changes against a specific corpus, so use the corpus-fixer agent to validate the changes in the corpus environment. model: inherit --- -You are an expert Gitcasso corpus debugging specialist with deep knowledge of browser extension development. You operate within the root project directory and specialize in using the corpus:view development environment to diagnose and fix detection logic issues. +You are an expert Gitcasso corpus debugging specialist with deep knowledge of browser extension development. You operate within the root project directory and specialize in using the corpus development environment to diagnose and fix detection logic issues. Your primary workflow: 1. **Environment Setup**: Always start by reading the documentation at the top of the `corpus-view.ts` file to understand the dev environment. -2. **Launch Development Environment**: Execute `pnpm corpus:view` to bring up the corpus:view development environment. Ensure the environment starts successfully before proceeding. +2. **Launch Development Environment**: Execute `pnpm corpus` to bring up the corpus development environment. Ensure the environment starts successfully before proceeding. 3. **Browser Navigation**: Use the Playwright MCP to interact with the development environment. Navigate to the specific Gitcasso corpus that needs investigation or fixing. diff --git a/.claude/commands/corpus-loop.md b/.claude/commands/corpus-loop.md index fe602c1..6102fa0 100644 --- a/.claude/commands/corpus-loop.md +++ b/.claude/commands/corpus-loop.md @@ -1,6 +1,6 @@ --- argument-hint: [corpus_slug] -description: uses Playwright MCP and the `corpus:view` to parse page elements +description: uses Playwright MCP and the `corpus` to parse page elements --- - using Playwright MCP, navigate to `http://localhost:3001/corpus/$1/gitcasso` diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 7541f49..5c6357a 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -79,7 +79,7 @@ When the `textarea` gets removed from the page, the `TextareaRegistry` is notifi ## Testing - `pnpm playground` gives you a test environment where you can tinker with the popup with various test data, supports hot reload -- `pnpm corpus:view` gives you recordings of various web pages which you can see with and without enhancement by the browser extension +- `pnpm corpus` gives you recordings of various web pages which you can see with and without enhancement by the browser extension ### Test Corpus @@ -114,7 +114,7 @@ We maintain a corpus of test pages in two formats for testing the browser extens #### Viewing Corpus Files -- Run `pnpm corpus:view` to start the test server at http://localhost:3001 +- Run `pnpm corpus` to start the test server at http://localhost:3001 - Select any corpus file to view in two modes: - **Clean**: Original page without extension - **Gitcasso**: Page with extension injected for testing diff --git a/package.json b/package.json index f83d4b4..2623443 100644 --- a/package.json +++ b/package.json @@ -64,7 +64,7 @@ "playground": "vite --config vite.playground.config.ts", "playground:build": "vite build --config vite.playground.config.ts", "corpus:har:record": "tsx tests/corpus-har-record.ts", - "corpus:view": "tsx tests/corpus-view.ts" + "corpus": "tsx tests/corpus-view.ts" }, "type": "module", "version": "0.0.1" From 68ab311edadc29c1490b02bf2d6a12be388e8cc4 Mon Sep 17 00:00:00 2001 From: Ned Twigg Date: Thu, 18 Sep 2025 13:32:07 -0700 Subject: [PATCH 3/6] Add a troubleshooting section to the corpus-loop. --- .claude/commands/corpus-loop.md | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/.claude/commands/corpus-loop.md b/.claude/commands/corpus-loop.md index 6102fa0..e2fdf9b 100644 --- a/.claude/commands/corpus-loop.md +++ b/.claude/commands/corpus-loop.md @@ -54,4 +54,13 @@ If you see `"title": "TODO_TITLE"` or similar hardcoded `TODO` values in the JSO - Don't hedge your bets and write lots of fallback code or strings of `?.`. Have a specific piece of data you want to get, use non-null `!` assertions where necessary to be clear about getting. - If a field is empty, represent it with an empty string. Don't use placeholders when extracting data. -- The pages we are scraping are going to change over time, and it's easier to fix broken ones if we know exactly what used to work. If the code has lots of branching paths, it's harder to tell what it was doing. \ No newline at end of file +- The pages we are scraping are going to change over time, and it's easier to fix broken ones if we know exactly what used to work. If the code has lots of branching paths, it's harder to tell what it was doing. + +## Troubleshooting + +- If you see `"spot": "NO_SPOT"` but expect an enhancer to match: + - Check console logs for enhancer attempts (e.g., `"eE examing url"`) + - Look for specific rejection reasons in the enhancer's `tryToEnhance` method +- If multiple enhancers are conflicting: + - Check the order of enhancer registration in `registries.ts` + - Ensure proper exclusion logic (e.g., checking for specific containers) From 1d68c1c5f84de5de4bc5a0a8087d0321511bd98e Mon Sep 17 00:00:00 2001 From: Ned Twigg Date: Thu, 18 Sep 2025 13:37:14 -0700 Subject: [PATCH 4/6] Remove the `corpus-fixer` agent, it was much worse than the corpus-enhancer. --- .claude/agents/corpus-fixer.md | 39 ---------------------------------- 1 file changed, 39 deletions(-) delete mode 100644 .claude/agents/corpus-fixer.md diff --git a/.claude/agents/corpus-fixer.md b/.claude/agents/corpus-fixer.md deleted file mode 100644 index 363a619..0000000 --- a/.claude/agents/corpus-fixer.md +++ /dev/null @@ -1,39 +0,0 @@ ---- -name: corpus-fixer -description: Use this agent when you need to fix or improve the detection logic for a specific Gitcasso corpus by testing changes in the corpus development environment. Examples: Context: User has identified issues with comment spot detection in a specific corpus and wants to test fixes. user: 'The comment detection is missing some spots in corpus ABC123, can you help fix the enhancer logic?' assistant: 'I'll use the corpus-fixer agent to investigate and fix the detection issues in that corpus.' Since the user wants to fix detection logic for a specific corpus, use the corpus-fixer agent to run the corpus environment and test changes. Context: User wants to validate that recent changes to an enhancer are working correctly. user: 'I made some changes to the GitHub enhancer, can you test it against corpus XYZ789?' assistant: 'Let me use the corpus-fixer agent to test your enhancer changes against that specific corpus.' The user wants to test enhancer changes against a specific corpus, so use the corpus-fixer agent to validate the changes in the corpus environment. -model: inherit ---- - -You are an expert Gitcasso corpus debugging specialist with deep knowledge of browser extension development. You operate within the root project directory and specialize in using the corpus development environment to diagnose and fix detection logic issues. - -Your primary workflow: - -1. **Environment Setup**: Always start by reading the documentation at the top of the `corpus-view.ts` file to understand the dev environment. - -2. **Launch Development Environment**: Execute `pnpm corpus` to bring up the corpus development environment. Ensure the environment starts successfully before proceeding. - -3. **Browser Navigation**: Use the Playwright MCP to interact with the development environment. Navigate to the specific Gitcasso corpus that needs investigation or fixing. - -4. **Code Synchronization**: Always click the button with id `gitcasso-rebuild-btn` to ensure you're testing against the latest code changes. Wait for the rebuild to complete before analyzing results. - -5. **Detection Analysis**: Examine the detected spots in the `gitcasso-comment-spots` element. Analyze what spots are being detected, what might be missing, and identify patterns in the detection logic that need improvement. - -6. **Enhancer Modification**: Based on your analysis, make targeted changes to the specific enhancer's detection logic. Focus on: - - Improving selector accuracy - - Handling edge cases in the DOM structure - - Optimizing detection algorithms for the specific site pattern - - Ensuring compatibility with dynamic content loading - -7. **Iterative Testing**: After making changes, rebuild and test again to validate improvements. Continue this cycle until the detection logic works correctly for the target corpus. - -8. **Documentation**: Clearly explain what issues you found, what changes you made, and why those changes improve the detection logic. - -Key principles: -- Always work incrementally - make small, targeted changes and test frequently -- Focus on the specific corpus mentioned by the user unless told otherwise -- Pay attention to browser console errors and network issues that might affect detection -- Consider how your changes might impact other sites or corpus entries -- Be methodical in your debugging approach - document what you try and what results you observe -- Understand that corpus can be either HAR files (for initial page loads) or HTML snapshots (for post-interaction states) - -You have expertise in CSS selectors, DOM manipulation, JavaScript debugging, and understanding how different websites structure their comment systems. Use this knowledge to create robust, reliable detection logic that works across various edge cases. From cf260e75c341411e83c9f9bd4eb908fd424411f2 Mon Sep 17 00:00:00 2001 From: Ned Twigg Date: Thu, 18 Sep 2025 13:54:21 -0700 Subject: [PATCH 5/6] more compact layout for `pnpm corpus` --- tests/corpus-view.ts | 56 ++++++++++++++++++-------------------------- 1 file changed, 23 insertions(+), 33 deletions(-) diff --git a/tests/corpus-view.ts b/tests/corpus-view.ts index facbfa8..db0acce 100644 --- a/tests/corpus-view.ts +++ b/tests/corpus-view.ts @@ -125,20 +125,19 @@ app.get('/', async (_req, res) => { try { const links = Object.entries(CORPUS) .map(([key, entry]) => { - const description = entry.description - ? `
${entry.description}
` - : '' + const description = entry.description ? `
${entry.description}
` : '' return `
  • -
    -
    ${key}
    -
    ${entry.type.toUpperCase()}
    - ${description} -
    -
    - 🔍 Clean - 🚀 Gitcasso +
    +
    + ${key} ${entry.type.toLowerCase()} +
    +
    + ${description}
  • ` }) @@ -151,36 +150,27 @@ app.get('/', async (_req, res) => { Corpus Viewer -

    📄 Corpus Viewer

    -

    Select a recorded page to view:

    +

    Corpus Viewer

      ${links}
    -
    -

    Corpus Types

    -

    HAR: Automated network captures of initial page loads

    -

    HTML: Manual SingleFile captures of post-interaction states

    +
    + HAR: Network captures | HTML: Manual captures
    From f070cdb8485eca729a7a984217fa1cec5a3a4624 Mon Sep 17 00:00:00 2001 From: Ned Twigg Date: Thu, 18 Sep 2025 13:54:42 -0700 Subject: [PATCH 6/6] biome:fix --- tests/corpus-view.ts | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/corpus-view.ts b/tests/corpus-view.ts index db0acce..a137932 100644 --- a/tests/corpus-view.ts +++ b/tests/corpus-view.ts @@ -125,7 +125,9 @@ app.get('/', async (_req, res) => { try { const links = Object.entries(CORPUS) .map(([key, entry]) => { - const description = entry.description ? `
    ${entry.description}
    ` : '' + const description = entry.description + ? `
    ${entry.description}
    ` + : '' return `