From 8f15fee73309186549310db9b8a28aab36556df5 Mon Sep 17 00:00:00 2001 From: Matthew O'Riordan Date: Tue, 30 Sep 2025 23:32:00 +0200 Subject: [PATCH 1/5] feat: generate markdown static files for LLM agent token optimization Generate both HTML and Markdown versions of each documentation page to optimize token usage for LLM crawlers and AI agents. Research shows that serving markdown instead of HTML can reduce token consumption by 60-80%, significantly improving efficiency and reducing costs for AI-powered tools accessing documentation. Reference: https://x.com/cramforce/status/1972430376149913715 ## Implementation - Added post-build hook to convert HTML pages to clean Markdown format - Configured nginx content negotiation to serve markdown when requested - Added validation script to ensure markdown generation completeness - Integrated markdown generation into CI/CD pipeline - Added UI button with markdown icon for user access ## Usage **Via content negotiation (for agents/crawlers):** ```bash curl -H "Accept: text/markdown" https://ably.com/docs/channels ``` **Direct file access:** ```bash curl https://ably.com/docs/channels/index.md ``` **Via UI:** Click the Markdown icon button in the "Open In" section on any page ## Technical Details - Uses Turndown library for HTML to Markdown conversion - Preserves code block language annotations - Removes navigation, headers, footers and UI chrome - Markdown files located at `/docs/{page-path}/index.md` - Skips redirect pages (324 redirects detected) - Successfully generates markdown for 209/210 content pages - No frontmatter - clean markdown content only --- .circleci/config.yml | 14 ++ README.md | 35 ++++ bin/validate-markdown.ts | 113 +++++++++++++ config/mime.types | 1 + config/nginx.conf.erb | 40 ++++- data/onPostBuild/generateMarkdown.ts | 214 +++++++++++++++++++++++++ data/onPostBuild/index.ts | 2 + package.json | 3 +- src/components/Layout/RightSidebar.tsx | 38 +++++ src/images/icons/markdown-mark.svg | 1 + static/icons/markdown-mark.svg | 1 + 11 files changed, 460 insertions(+), 2 deletions(-) create mode 100755 bin/validate-markdown.ts create mode 100644 data/onPostBuild/generateMarkdown.ts create mode 100644 src/images/icons/markdown-mark.svg create mode 100644 static/icons/markdown-mark.svg diff --git a/.circleci/config.yml b/.circleci/config.yml index c24acc6c63..df36b0304f 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -81,6 +81,17 @@ jobs: name: Validate llms.txt command: yarn validate-llms-txt + validate-markdown: + executor: + name: default + steps: + - checkout + - attach_workspace: + at: . + - run: + name: Validate markdown files + command: yarn validate-markdown + test-nginx: docker: - image: heroku/heroku:24-build @@ -155,3 +166,6 @@ workflows: - validate-llms-txt: requires: - build + - validate-markdown: + requires: + - build diff --git a/README.md b/README.md index 0cb8da6992..49e93b19a6 100644 --- a/README.md +++ b/README.md @@ -22,6 +22,41 @@ To run the docs site locally, run `bin/dev` from the root directory. Alternative View the [contribution guide](CONTRIBUTING.md) for information on how to write content and contribute to Ably docs. +## Markdown Static Files + +The build process generates both HTML and Markdown versions of each documentation page. This provides a more token-efficient format for LLM crawlers and API clients. + +### Content Negotiation + +The site supports content negotiation via the `Accept` header: + +```bash +# Request markdown version +curl -H "Accept: text/markdown" https://ably.com/docs/channels + +# Request HTML version (default) +curl https://ably.com/docs/channels +``` + +Markdown files are located at `/docs/{page-path}/index.md` alongside their HTML counterparts at `/docs/{page-path}/index.html`. + +### Build Process + +1. **Source**: Content is written in Textile or MDX format +2. **HTML Generation**: Gatsby converts source files to static HTML +3. **Markdown Generation**: The `generateMarkdown` post-build hook converts HTML to clean Markdown +4. **Compression**: Both HTML and Markdown files are gzip compressed + +### Validation + +Validate markdown generation after building: + +```bash +yarn validate-markdown +``` + +This ensures all HTML pages have corresponding Markdown files and reports any issues. + ## Support If you have any questions or suggestions, please [raise an issue](https://github.com/ably/docs/issues). diff --git a/bin/validate-markdown.ts b/bin/validate-markdown.ts new file mode 100755 index 0000000000..3a12ffefa7 --- /dev/null +++ b/bin/validate-markdown.ts @@ -0,0 +1,113 @@ +#!/usr/bin/env node + +/** + * Validates that markdown files exist for all HTML pages in the public directory. + * This script ensures the markdown generation process completed successfully. + */ + +import * as fs from 'fs'; +import * as path from 'path'; +import fastGlob from 'fast-glob'; + +const publicDir = path.join(process.cwd(), 'public', 'docs'); + +interface ValidationResult { + totalPages: number; + markdownFound: number; + markdownMissing: number; + redirectPages: number; + missingFiles: string[]; +} + +const validateMarkdownFiles = async (): Promise => { + // Find all index.html files in the docs directory + const htmlFiles = await fastGlob('**/index.html', { + cwd: publicDir, + absolute: false, + }); + + const result: ValidationResult = { + totalPages: htmlFiles.length, + markdownFound: 0, + markdownMissing: 0, + redirectPages: 0, + missingFiles: [], + }; + + for (const htmlFile of htmlFiles) { + // Get the directory of the HTML file + const dir = path.dirname(htmlFile); + + // Check if this is a redirect page (skip validation for these) + const htmlPath = path.join(publicDir, htmlFile); + const htmlContent = fs.readFileSync(htmlPath, 'utf8'); + + if (htmlContent.length < 1000 && htmlContent.includes('window.location.href')) { + result.redirectPages++; + continue; // Skip redirect pages + } + + // Check if corresponding markdown file exists + const markdownFile = path.join(publicDir, dir, 'index.md'); + + if (fs.existsSync(markdownFile)) { + result.markdownFound++; + + // Verify the markdown file has content + const stats = fs.statSync(markdownFile); + if (stats.size === 0) { + console.warn(`āš ļø Warning: ${dir}/index.md is empty`); + } + } else { + result.markdownMissing++; + result.missingFiles.push(dir); + } + } + + return result; +}; + +const main = async () => { + console.log('šŸ” Validating markdown files...\n'); + + if (!fs.existsSync(publicDir)) { + console.error(`āŒ Error: Public docs directory not found: ${publicDir}`); + console.error(' Make sure to run this script after the build process.'); + process.exit(1); + } + + try { + const result = await validateMarkdownFiles(); + + console.log(`šŸ“Š Validation Results:`); + console.log(` Total HTML pages: ${result.totalPages}`); + console.log(` šŸ”€ Redirect pages (skipped): ${result.redirectPages}`); + console.log(` šŸ“„ Content pages: ${result.totalPages - result.redirectPages}`); + console.log(` āœ… Markdown files found: ${result.markdownFound}`); + console.log(` āŒ Markdown files missing: ${result.markdownMissing}`); + + if (result.markdownMissing > 0) { + console.log('\nāš ļø Missing markdown files:'); + result.missingFiles.slice(0, 10).forEach((file) => { + console.log(` - ${file}/index.md`); + }); + + if (result.missingFiles.length > 10) { + console.log(` ... and ${result.missingFiles.length - 10} more`); + } + + console.log('\nāŒ Validation failed: Some markdown files are missing.'); + console.log(' This may indicate an issue with the markdown generation process.'); + process.exit(1); + } + + // Calculate coverage percentage + const coverage = (result.markdownFound / result.totalPages) * 100; + console.log(`\nāœ… Validation passed! Markdown coverage: ${coverage.toFixed(1)}%`); + } catch (error) { + console.error('āŒ Error during validation:', error); + process.exit(1); + } +}; + +main(); diff --git a/config/mime.types b/config/mime.types index 2961256950..7baa544b0f 100644 --- a/config/mime.types +++ b/config/mime.types @@ -11,6 +11,7 @@ types { text/mathml mml; text/plain txt; + text/markdown md markdown; text/vnd.sun.j2me.app-descriptor jad; text/vnd.wap.wml wml; text/x-component htc; diff --git a/config/nginx.conf.erb b/config/nginx.conf.erb index d14403a35c..03a5b363a5 100644 --- a/config/nginx.conf.erb +++ b/config/nginx.conf.erb @@ -18,7 +18,7 @@ http { gzip on; gzip_comp_level 6; gzip_min_length 512; - gzip_types text/plain text/css application/json application/javascript text/xml application/xml application/xml+rss font/woff font/woff2 image/svg+xml; + gzip_types text/plain text/css application/json application/javascript text/xml application/xml application/xml+rss font/woff font/woff2 image/svg+xml text/markdown; gzip_vary on; gzip_proxied any; # Heroku router sends Via header @@ -126,6 +126,15 @@ http { # / PROTECTED CONTENT REQUESTS + ## + # CONTENT NEGOTIATION FOR MARKDOWN + # Check if the client accepts markdown by looking for text/markdown in the Accept header + map $http_accept $prefers_markdown { + default "no"; + "~*text/markdown" "yes"; + } + # / CONTENT NEGOTIATION FOR MARKDOWN + server { listen <%= ENV["PORT"] %>; charset UTF-8; @@ -230,13 +239,42 @@ http { <% if content_request_protected %> # Serve the file if it exists, otherwise try to authenticate # (.html requests won't match here, they'll go to the @html_auth location) + # If client prefers markdown and is authenticated, serve markdown + if ($prefers_markdown = "yes") { + rewrite ^ @markdown_request last; + } try_files $request_uri @html_auth; <% else %> + # If client prefers markdown, serve markdown instead of HTML + if ($prefers_markdown = "yes") { + rewrite ^ @markdown_request last; + } # Serve the file if it exists, try index.html for paths without a trailing slash, otherwise 404 try_files $request_uri $request_uri/index.html $request_uri/ =404; <% end %> } + # Serve markdown files with content negotiation + location @markdown_request { + <% if content_request_protected %> + # Check authentication for markdown requests + if ($token_auth_status != "allowed") { + <% if host = ENV['CONTENT_REQUEST_CANONICAL_HOST'] %> + return 301 <%= ENV['SKIP_HTTPS'] == 'true' ? '$scheme' : 'https' %>://<%= host %>$request_uri; + <% else %> + return 404; + <% end %> + } + <% end %> + + # Set proper content type for markdown + more_set_headers 'Content-Type: text/markdown; charset=utf-8'; + more_set_headers 'Vary: Accept'; + + # Try to serve the markdown file, fall back to HTML if not available + try_files $request_uri/index.md $request_uri.md $request_uri/index.html $request_uri/ =404; + } + <% if content_request_protected %> # Authenticate .html requests by checking the token_auth_status variable # which is set in the map block earlier in this file. diff --git a/data/onPostBuild/generateMarkdown.ts b/data/onPostBuild/generateMarkdown.ts new file mode 100644 index 0000000000..36e816fd51 --- /dev/null +++ b/data/onPostBuild/generateMarkdown.ts @@ -0,0 +1,214 @@ +import { GatsbyNode } from 'gatsby'; +import * as path from 'path'; +import * as fs from 'fs'; +import TurndownService from 'turndown'; +import cheerio from 'cheerio'; +import fastGlob from 'fast-glob'; + +/** + * This script generates Markdown static files alongside HTML files for each page. + * This allows LLM crawlers and other clients to request markdown versions of pages + * which are significantly more token-efficient than HTML. + */ + +const REPORTER_PREFIX = 'generateMarkdown:'; + +// Configure Turndown for documentation-friendly markdown +const createTurndownService = () => { + const turndownService = new TurndownService({ + headingStyle: 'atx', + codeBlockStyle: 'fenced', + bulletListMarker: '-', + emDelimiter: '_', + strongDelimiter: '**', + linkStyle: 'inlined', + linkReferenceStyle: 'full', + }); + + // Preserve code block language annotations + turndownService.addRule('fencedCodeBlock', { + filter: (node) => { + return node.nodeName === 'PRE' && node.firstChild && node.firstChild.nodeName === 'CODE'; + }, + replacement: (content, node) => { + const codeNode = node.firstChild as HTMLElement; + const className = codeNode.getAttribute('class') || ''; + const languageMatch = className.match(/language-(\w+)/); + const language = languageMatch ? languageMatch[1] : ''; + + // Get the actual code content + const code = codeNode.textContent || ''; + + return '\n\n```' + language + '\n' + code + '\n```\n\n'; + }, + }); + + // Remove navigation, headers, footers, and other UI elements + turndownService.remove(['nav', 'header', 'footer', 'script', 'style', 'noscript']); + + return turndownService; +}; + +// Extract main article content from HTML file +const extractMainContent = (htmlPath: string): string | null => { + try { + if (!fs.existsSync(htmlPath)) { + return null; + } + + const html = fs.readFileSync(htmlPath, 'utf8'); + + // Check if this is a redirect page (very small file with window.location.href) + if (html.length < 1000 && html.includes('window.location.href')) { + return null; // Skip redirect pages + } + + const $ = cheerio.load(html); + + // Remove unwanted elements + $('nav, header, footer, script, style, noscript, .sidebar, .navigation').remove(); + + // Try to find the main article content + // Look for common article containers + let mainContent = $('article').html() || $('main').html() || $('#main-content').html(); + + // If we can't find a main content area, fall back to body but remove header/footer + if (!mainContent) { + $('body > header, body > footer, body > nav').remove(); + mainContent = $('body').html(); + } + + // Check if content is meaningful (more than just whitespace/empty tags) + if (mainContent && mainContent.trim().length < 100) { + return null; // Skip pages with minimal content + } + + return mainContent || null; + } catch (error) { + console.error(`Error extracting content from ${htmlPath}:`, error); + return null; + } +}; + +// Create markdown frontmatter (disabled - returns empty string) +const createFrontmatter = (title: string, description: string): string => { + return ''; +}; + +// Convert HTML content to Markdown +const convertToMarkdown = (htmlContent: string, title: string, description: string): string => { + const turndownService = createTurndownService(); + + // Add frontmatter + const frontmatter = createFrontmatter(title, description); + + // Convert HTML to Markdown + const markdown = turndownService.turndown(htmlContent); + + // Clean up excessive newlines + const cleanedMarkdown = markdown.replace(/\n{3,}/g, '\n\n'); + + return frontmatter + cleanedMarkdown; +}; + +// Write markdown file +const writeMarkdownFile = (outputPath: string, content: string, reporter: any) => { + try { + // Ensure directory exists + const dir = path.dirname(outputPath); + if (!fs.existsSync(dir)) { + fs.mkdirSync(dir, { recursive: true }); + } + + fs.writeFileSync(outputPath, content, 'utf8'); + return true; + } catch (error) { + reporter.error(`${REPORTER_PREFIX} Error writing markdown file ${outputPath}:`, error as Error); + return false; + } +}; + +export const onPostBuild: GatsbyNode['onPostBuild'] = async ({ graphql, reporter, basePath }) => { + const publicDir = path.join(process.cwd(), 'public'); + const docsDir = path.join(publicDir, 'docs'); + + // Find all index.html files in the public/docs directory + const htmlFiles = await fastGlob('**/index.html', { + cwd: docsDir, + absolute: false, + }); + + reporter.info(`${REPORTER_PREFIX} Found ${htmlFiles.length} HTML files to process`); + + // Process all HTML files and extract metadata + const allPages = htmlFiles + .map((htmlFile) => { + const htmlPath = path.join(docsDir, htmlFile); + const html = fs.readFileSync(htmlPath, 'utf8'); + + // Skip redirect pages + if (html.length < 1000 && html.includes('window.location.href')) { + return null; + } + + // Extract slug from file path (remove /index.html) + const slug = htmlFile.replace(/\/index\.html$/, '').replace(/^\.\//, ''); + + // Extract title and description from HTML meta tags + const $ = cheerio.load(html); + const title = + $('meta[property="og:title"]').attr('content') || + $('meta[name="twitter:title"]').attr('content') || + $('title').text() || + 'Untitled'; + const description = + $('meta[name="description"]').attr('content') || $('meta[property="og:description"]').attr('content') || ''; + + return { + slug: slug || '.', + title, + description, + }; + }) + .filter((page) => page !== null) as { slug: string; title: string; description: string }[]; + + reporter.info(`${REPORTER_PREFIX} Processing ${allPages.length} content pages`); + + let successCount = 0; + let failCount = 0; + + for (const page of allPages) { + const { slug, title, description } = page; + + // Determine the HTML file path + const htmlPath = path.join(publicDir, 'docs', slug, 'index.html'); + + // Extract main content from HTML + const htmlContent = extractMainContent(htmlPath); + + if (!htmlContent) { + reporter.warn(`${REPORTER_PREFIX} Could not extract content for ${slug}`); + failCount++; + continue; + } + + // Convert to markdown + const markdown = convertToMarkdown(htmlContent, title, description); + + // Write markdown file + const markdownPath = path.join(publicDir, 'docs', slug, 'index.md'); + const success = writeMarkdownFile(markdownPath, markdown, reporter); + + if (success) { + successCount++; + } else { + failCount++; + } + } + + if (failCount > 0) { + reporter.warn(`${REPORTER_PREFIX} Generated ${successCount} markdown files with ${failCount} failures`); + } else { + reporter.info(`${REPORTER_PREFIX} Successfully generated ${successCount} markdown files`); + } +}; diff --git a/data/onPostBuild/index.ts b/data/onPostBuild/index.ts index 844392b4d6..a56de02f6d 100644 --- a/data/onPostBuild/index.ts +++ b/data/onPostBuild/index.ts @@ -1,9 +1,11 @@ import { GatsbyNode } from 'gatsby'; import { onPostBuild as llmstxt } from './llmstxt'; +import { onPostBuild as generateMarkdown } from './generateMarkdown'; import { onPostBuild as compressAssets } from './compressAssets'; export const onPostBuild: GatsbyNode['onPostBuild'] = async (args) => { // Run all onPostBuild functions in sequence await llmstxt(args); + await generateMarkdown(args); await compressAssets(args); }; diff --git a/package.json b/package.json index 2f26235923..71575fce45 100644 --- a/package.json +++ b/package.json @@ -38,7 +38,8 @@ "lint-staged": "lint-staged", "repo-githooks": "git config core.hooksPath .githooks", "no-githooks": "git config --unset core.hooksPath", - "validate-llms-txt": "node bin/validate-llms.txt.ts" + "validate-llms-txt": "node bin/validate-llms.txt.ts", + "validate-markdown": "node bin/validate-markdown.ts" }, "dependencies": { "@ably/ui": "17.7.3", diff --git a/src/components/Layout/RightSidebar.tsx b/src/components/Layout/RightSidebar.tsx index 824f580fc8..6e9bd73142 100644 --- a/src/components/Layout/RightSidebar.tsx +++ b/src/components/Layout/RightSidebar.tsx @@ -321,6 +321,44 @@ const RightSidebar = () => { ))} + { + // Check if markdown file exists by attempting to fetch it + const markdownUrl = `${location.pathname.replace(/\/$/, '')}/index.md`; + fetch(markdownUrl, { method: 'HEAD' }) + .then((response) => { + if (!response.ok) { + e.preventDefault(); + alert( + 'Markdown files are only available in production builds. Run "yarn build" to generate them.', + ); + } + }) + .catch(() => { + e.preventDefault(); + alert('Markdown files are only available in production builds. Run "yarn build" to generate them.'); + }); + + track('markdown_link_clicked', { + location: location.pathname, + }); + }} + > + + } + > + View in Markdown + + diff --git a/src/images/icons/markdown-mark.svg b/src/images/icons/markdown-mark.svg new file mode 100644 index 0000000000..171ea25482 --- /dev/null +++ b/src/images/icons/markdown-mark.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/static/icons/markdown-mark.svg b/static/icons/markdown-mark.svg new file mode 100644 index 0000000000..171ea25482 --- /dev/null +++ b/static/icons/markdown-mark.svg @@ -0,0 +1 @@ + \ No newline at end of file From 2fbeaff0384e66363ef8663111a2d3b4e350c66b Mon Sep 17 00:00:00 2001 From: Matthew O'Riordan Date: Tue, 30 Sep 2025 23:39:44 +0200 Subject: [PATCH 2/5] refactor: address Copilot PR feedback - Remove fetch overhead in RightSidebar by checking NODE_ENV instead - Simplify convertToMarkdown by removing unused createFrontmatter function - Extract magic numbers to named constants (REDIRECT_PAGE_MAX_SIZE, MIN_CONTENT_LENGTH) - Apply constants consistently across generateMarkdown.ts and validate-markdown.ts --- bin/validate-markdown.ts | 5 ++++- data/onPostBuild/generateMarkdown.ts | 22 +++++++++------------- src/components/Layout/RightSidebar.tsx | 20 +++++--------------- 3 files changed, 18 insertions(+), 29 deletions(-) diff --git a/bin/validate-markdown.ts b/bin/validate-markdown.ts index 3a12ffefa7..7b1836b401 100755 --- a/bin/validate-markdown.ts +++ b/bin/validate-markdown.ts @@ -11,6 +11,9 @@ import fastGlob from 'fast-glob'; const publicDir = path.join(process.cwd(), 'public', 'docs'); +// Constants for content validation (must match generateMarkdown.ts) +const REDIRECT_PAGE_MAX_SIZE = 1000; // Maximum size in bytes for redirect pages + interface ValidationResult { totalPages: number; markdownFound: number; @@ -42,7 +45,7 @@ const validateMarkdownFiles = async (): Promise => { const htmlPath = path.join(publicDir, htmlFile); const htmlContent = fs.readFileSync(htmlPath, 'utf8'); - if (htmlContent.length < 1000 && htmlContent.includes('window.location.href')) { + if (htmlContent.length < REDIRECT_PAGE_MAX_SIZE && htmlContent.includes('window.location.href')) { result.redirectPages++; continue; // Skip redirect pages } diff --git a/data/onPostBuild/generateMarkdown.ts b/data/onPostBuild/generateMarkdown.ts index 36e816fd51..1c3ec852b2 100644 --- a/data/onPostBuild/generateMarkdown.ts +++ b/data/onPostBuild/generateMarkdown.ts @@ -13,6 +13,10 @@ import fastGlob from 'fast-glob'; const REPORTER_PREFIX = 'generateMarkdown:'; +// Constants for content validation +const REDIRECT_PAGE_MAX_SIZE = 1000; // Maximum size in bytes for redirect pages +const MIN_CONTENT_LENGTH = 100; // Minimum content length to consider meaningful + // Configure Turndown for documentation-friendly markdown const createTurndownService = () => { const turndownService = new TurndownService({ @@ -59,7 +63,7 @@ const extractMainContent = (htmlPath: string): string | null => { const html = fs.readFileSync(htmlPath, 'utf8'); // Check if this is a redirect page (very small file with window.location.href) - if (html.length < 1000 && html.includes('window.location.href')) { + if (html.length < REDIRECT_PAGE_MAX_SIZE && html.includes('window.location.href')) { return null; // Skip redirect pages } @@ -79,7 +83,7 @@ const extractMainContent = (htmlPath: string): string | null => { } // Check if content is meaningful (more than just whitespace/empty tags) - if (mainContent && mainContent.trim().length < 100) { + if (mainContent && mainContent.trim().length < MIN_CONTENT_LENGTH) { return null; // Skip pages with minimal content } @@ -90,25 +94,17 @@ const extractMainContent = (htmlPath: string): string | null => { } }; -// Create markdown frontmatter (disabled - returns empty string) -const createFrontmatter = (title: string, description: string): string => { - return ''; -}; - // Convert HTML content to Markdown -const convertToMarkdown = (htmlContent: string, title: string, description: string): string => { +const convertToMarkdown = (htmlContent: string): string => { const turndownService = createTurndownService(); - // Add frontmatter - const frontmatter = createFrontmatter(title, description); - // Convert HTML to Markdown const markdown = turndownService.turndown(htmlContent); // Clean up excessive newlines const cleanedMarkdown = markdown.replace(/\n{3,}/g, '\n\n'); - return frontmatter + cleanedMarkdown; + return cleanedMarkdown; }; // Write markdown file @@ -193,7 +189,7 @@ export const onPostBuild: GatsbyNode['onPostBuild'] = async ({ graphql, reporter } // Convert to markdown - const markdown = convertToMarkdown(htmlContent, title, description); + const markdown = convertToMarkdown(htmlContent); // Write markdown file const markdownPath = path.join(publicDir, 'docs', slug, 'index.md'); diff --git a/src/components/Layout/RightSidebar.tsx b/src/components/Layout/RightSidebar.tsx index 6e9bd73142..144c5bdf15 100644 --- a/src/components/Layout/RightSidebar.tsx +++ b/src/components/Layout/RightSidebar.tsx @@ -325,21 +325,11 @@ const RightSidebar = () => { href={`${location.pathname.replace(/\/$/, '')}/index.md`} className="flex h-5 ui-theme-dark group/markdown-link cursor-pointer" onClick={(e) => { - // Check if markdown file exists by attempting to fetch it - const markdownUrl = `${location.pathname.replace(/\/$/, '')}/index.md`; - fetch(markdownUrl, { method: 'HEAD' }) - .then((response) => { - if (!response.ok) { - e.preventDefault(); - alert( - 'Markdown files are only available in production builds. Run "yarn build" to generate them.', - ); - } - }) - .catch(() => { - e.preventDefault(); - alert('Markdown files are only available in production builds. Run "yarn build" to generate them.'); - }); + // In development mode, markdown files aren't generated, so show alert immediately + if (process.env.NODE_ENV === 'development') { + e.preventDefault(); + alert('Markdown files are only available in production builds. Run "yarn build" to generate them.'); + } track('markdown_link_clicked', { location: location.pathname, From 979a8d37c748f55be64efd0a84edd361c279c162 Mon Sep 17 00:00:00 2001 From: Matthew O'Riordan Date: Thu, 2 Oct 2025 00:15:41 +0200 Subject: [PATCH 3/5] fix: resolve markdown generation issues - Use REDIRECT_PAGE_MAX_SIZE constant instead of hardcoded value - Fix slug extraction regex to properly handle root index.html file - Ensures all 210 content pages generate markdown files successfully --- data/onPostBuild/generateMarkdown.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/data/onPostBuild/generateMarkdown.ts b/data/onPostBuild/generateMarkdown.ts index 1c3ec852b2..21001d81e2 100644 --- a/data/onPostBuild/generateMarkdown.ts +++ b/data/onPostBuild/generateMarkdown.ts @@ -143,12 +143,12 @@ export const onPostBuild: GatsbyNode['onPostBuild'] = async ({ graphql, reporter const html = fs.readFileSync(htmlPath, 'utf8'); // Skip redirect pages - if (html.length < 1000 && html.includes('window.location.href')) { + if (html.length < REDIRECT_PAGE_MAX_SIZE && html.includes('window.location.href')) { return null; } - // Extract slug from file path (remove /index.html) - const slug = htmlFile.replace(/\/index\.html$/, '').replace(/^\.\//, ''); + // Extract slug from file path (remove index.html) + const slug = htmlFile.replace(/\/?index\.html$/, '').replace(/^\.\//, ''); // Extract title and description from HTML meta tags const $ = cheerio.load(html); From 8f10806b65a0e9b507686cd9225e112ea2cd7b6a Mon Sep 17 00:00:00 2001 From: Matthew O'Riordan Date: Thu, 2 Oct 2025 13:08:50 +0200 Subject: [PATCH 4/5] refactor: improve code quality in markdown generation - Use regex for more robust redirect page detection - Remove unused title/description extraction (frontmatter disabled) - Remove non-existent .sidebar and .navigation CSS selectors - Distinguish between skipped pages and failures in logging - Remove redundant code comments --- data/onPostBuild/generateMarkdown.ts | 35 ++++++++++------------------ 1 file changed, 12 insertions(+), 23 deletions(-) diff --git a/data/onPostBuild/generateMarkdown.ts b/data/onPostBuild/generateMarkdown.ts index 21001d81e2..fdc1dddcc1 100644 --- a/data/onPostBuild/generateMarkdown.ts +++ b/data/onPostBuild/generateMarkdown.ts @@ -47,7 +47,6 @@ const createTurndownService = () => { }, }); - // Remove navigation, headers, footers, and other UI elements turndownService.remove(['nav', 'header', 'footer', 'script', 'style', 'noscript']); return turndownService; @@ -62,15 +61,14 @@ const extractMainContent = (htmlPath: string): string | null => { const html = fs.readFileSync(htmlPath, 'utf8'); - // Check if this is a redirect page (very small file with window.location.href) - if (html.length < REDIRECT_PAGE_MAX_SIZE && html.includes('window.location.href')) { + // Check if this is a redirect page + if (html.length < REDIRECT_PAGE_MAX_SIZE && /