From fcc5de76ab11b4df0314227893c79ed1d8907d3e Mon Sep 17 00:00:00 2001 From: Venkata Krishnan S <114849299+venkat22022202@users.noreply.github.com> Date: Wed, 1 Apr 2026 18:26:24 +0530 Subject: [PATCH] fix(utils): handle XML-like tags in inline code during metadata extraction When a page's first paragraph contains inline code with XML-like tags (e.g. ``), the createExcerpt function incorrectly strips them as HTML tags before processing the backtick syntax, producing broken excerpt text like "Removes the `` element from the document." Move inline code processing before HTML tag removal and escape angle brackets in extracted code content to HTML entities so they survive the subsequent HTML-stripping step. Fixes #11818 Co-Authored-By: Claude Opus 4.6 (1M context) --- .../src/__tests__/markdownUtils.test.ts | 24 +++++++++++++++++++ .../docusaurus-utils/src/markdownUtils.ts | 9 +++++-- 2 files changed, 31 insertions(+), 2 deletions(-) diff --git a/packages/docusaurus-utils/src/__tests__/markdownUtils.test.ts b/packages/docusaurus-utils/src/__tests__/markdownUtils.test.ts index 71be6442e082..cb5c4960223d 100644 --- a/packages/docusaurus-utils/src/__tests__/markdownUtils.test.ts +++ b/packages/docusaurus-utils/src/__tests__/markdownUtils.test.ts @@ -184,6 +184,30 @@ describe('createExcerpt', () => { `), ).toBe('Lorem ipsum dolor sit amet, consectetur adipiscing elit.'); }); + + it('creates excerpt with XML tag inside inline code', () => { + expect( + createExcerpt(dedent` + # Markdown Regular Title + + This paragraph includes a link to the \`\` documentation. + `), + ).toBe( + 'This paragraph includes a link to the <metadata> documentation.', + ); + }); + + it('creates excerpt with XML tag inside inline code with hyperlink', () => { + expect( + createExcerpt(dedent` + # Markdown Regular Title + + This paragraph includes a link to the [\`\`](https://developer.mozilla.org/en-US/docs/Web/SVG/Element/metadata) documentation. + `), + ).toBe( + 'This paragraph includes a link to the <metadata> documentation.', + ); + }); }); describe('parseMarkdownContentTitle', () => { diff --git a/packages/docusaurus-utils/src/markdownUtils.ts b/packages/docusaurus-utils/src/markdownUtils.ts index cef01cb70c1c..f73f16da285e 100644 --- a/packages/docusaurus-utils/src/markdownUtils.ts +++ b/packages/docusaurus-utils/src/markdownUtils.ts @@ -128,6 +128,13 @@ export function createExcerpt(fileString: string): string | undefined { } const cleanedLine = fileLine + // Remove inline code (must come before HTML tag removal so that + // XML-like tags inside backticks are not stripped as HTML). + // Angle brackets in code content are escaped to HTML entities so they + // survive the HTML-tag removal step that follows. + .replace(/`(?.+?)`/g, (_match, text: string) => + text.replace(//g, '>'), + ) // Remove HTML tags. .replace(/<[^>]*>/g, '') // Remove Title headers @@ -144,8 +151,6 @@ export function createExcerpt(fileString: string): string | undefined { .replace(/\[\^.+?\](?:: .*$)?/g, '') // Remove inline links. .replace(/\[(?.*?)\][[(].*?[\])]/g, '$1') - // Remove inline code. - .replace(/`(?.+?)`/g, '$1') // Remove blockquotes. .replace(/^\s{0,3}>\s?/g, '') // Remove admonition definition.