Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
105 changes: 105 additions & 0 deletions data/onPostBuild/transpileMdxToMarkdown.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import {
removeAnchorTags,
removeJsxComments,
convertImagePathsToGitHub,
convertDocsLinksToMarkdown,
convertRelativeUrls,
replaceTemplateVariables,
calculateOutputPath,
Expand Down Expand Up @@ -332,6 +333,110 @@ import Baz from 'qux';
});
});

describe('convertDocsLinksToMarkdown', () => {
it('should add .md extension to /docs/ links', () => {
const input = '[Link text](https://ably.com/docs/channels)';
const output = convertDocsLinksToMarkdown(input);
expect(output).toBe('[Link text](https://ably.com/docs/channels.md)');
});

it('should remove ?lang= query parameters from /docs/ links', () => {
const input = '[Link text](https://ably.com/docs/api/realtime-sdk/channels?lang=javascript)';
const output = convertDocsLinksToMarkdown(input);
expect(output).toBe('[Link text](https://ably.com/docs/api/realtime-sdk/channels.md)');
});

it('should remove any query parameters from /docs/ links', () => {
const input = '[Link text](https://ably.com/docs/channels?lang=python&version=2)';
const output = convertDocsLinksToMarkdown(input);
expect(output).toBe('[Link text](https://ably.com/docs/channels.md)');
});

// Hash anchors are preserved as they provide semantic context for LLMs
it('should preserve hash anchors and add .md before them', () => {
const input = '[Link text](https://ably.com/docs/channels#section)';
const output = convertDocsLinksToMarkdown(input);
expect(output).toBe('[Link text](https://ably.com/docs/channels.md#section)');
});

it('should handle both query params and hash anchors', () => {
const input = '[Link text](https://ably.com/docs/channels?lang=javascript#section)';
const output = convertDocsLinksToMarkdown(input);
expect(output).toBe('[Link text](https://ably.com/docs/channels.md#section)');
});

it('should not modify links that already have .md extension', () => {
const input = '[Link text](https://ably.com/docs/channels.md)';
const output = convertDocsLinksToMarkdown(input);
expect(output).toBe('[Link text](https://ably.com/docs/channels.md)');
});

it('should not modify non-/docs/ links', () => {
const input = '[Link text](https://ably.com/blog/article?lang=en)';
const output = convertDocsLinksToMarkdown(input);
expect(output).toBe('[Link text](https://ably.com/blog/article?lang=en)');
});

it('should handle multiple links in content', () => {
const input = `Check [channels](https://ably.com/docs/channels?lang=js) and [presence](https://ably.com/docs/presence?lang=python#enter)`;
const output = convertDocsLinksToMarkdown(input);
expect(output).toContain('[channels](https://ably.com/docs/channels.md)');
expect(output).toContain('[presence](https://ably.com/docs/presence.md#enter)');
});

it('should not modify external non-ably /docs/ links', () => {
const input = '[External](https://example.com/docs/page?lang=en)';
const output = convertDocsLinksToMarkdown(input);
expect(output).toBe('[External](https://example.com/docs/page?lang=en)');
});

it('should not add .md to URLs that already have a file extension (.png)', () => {
const input = '[Image](https://raw.githubusercontent.com/ably/docs/main/src/images/content/diagrams/test.png)';
const output = convertDocsLinksToMarkdown(input);
expect(output).toBe('[Image](https://raw.githubusercontent.com/ably/docs/main/src/images/content/diagrams/test.png)');
});

// Tests for trailing slash normalization
it('should normalize trailing slashes before adding .md', () => {
const input = '[Link text](https://ably.com/docs/channels/)';
const output = convertDocsLinksToMarkdown(input);
expect(output).toBe('[Link text](https://ably.com/docs/channels.md)');
});

it('should normalize trailing slashes with hash anchors', () => {
const input = '[Link text](https://ably.com/docs/channels/#section)';
const output = convertDocsLinksToMarkdown(input);
expect(output).toBe('[Link text](https://ably.com/docs/channels.md#section)');
});

it('should normalize trailing slashes with query params', () => {
const input = '[Link text](https://ably.com/docs/channels/?lang=javascript)';
const output = convertDocsLinksToMarkdown(input);
expect(output).toBe('[Link text](https://ably.com/docs/channels.md)');
});

// Tests for www subdomains - www.ably.com and www.ably-dev.com ARE processed
it('should process www.ably.com links (www subdomain is in allowlist)', () => {
const input = '[Link text](https://www.ably.com/docs/channels)';
const output = convertDocsLinksToMarkdown(input);
expect(output).toBe('[Link text](https://www.ably.com/docs/channels.md)');
});

// Tests for relative URLs (should not be processed)
it('should not modify relative URLs', () => {
const input = '[Link text](/docs/channels)';
const output = convertDocsLinksToMarkdown(input);
expect(output).toBe('[Link text](/docs/channels)');
});

// Test for invalid URLs
it('should not modify invalid URLs', () => {
const input = '[Link text](not-a-valid-url)';
const output = convertDocsLinksToMarkdown(input);
expect(output).toBe('[Link text](not-a-valid-url)');
});
});

describe('convertRelativeUrls', () => {
it('should convert relative URLs to absolute', () => {
const input = '[Link text](/docs/channels)';
Expand Down
70 changes: 68 additions & 2 deletions data/onPostBuild/transpileMdxToMarkdown.ts
Original file line number Diff line number Diff line change
Expand Up @@ -323,6 +323,68 @@ function convertImagePathsToGitHub(content: string): string {
);
}

/**
* Convert Ably /docs/ links to use .md extension and remove ?lang= query parameters
* This is needed for LLM-friendly markdown files where all links should point to .md files
* Converts: [text](https://ably.com/docs/channels?lang=javascript) → [text](https://ably.com/docs/channels.md)
* Preserves: Non-Ably /docs/ links, sdk.ably.com links (API docs), already .md links
*/
function convertDocsLinksToMarkdown(content: string): string {

// Allowed hostnames for docs link conversion (exact matches only)
const ALLOWED_DOCS_HOSTNAMES = ['ably.com', 'www.ably.com', 'ably-dev.com', 'www.ably-dev.com'];

// Match markdown links: [text](url)
return content.replace(/\[([^\]]+)\]\(([^)]+)\)/g, (match, linkText, url) => {
// Only process absolute URLs with http/https
if (!url.startsWith('http://') && !url.startsWith('https://')) {
return match;
}

// Parse the URL to properly validate the host
let parsedUrl: URL;
try {
parsedUrl = new URL(url);
} catch {
// Invalid URL, return as-is
return match;
}

// Only process URLs from allowed Ably domains (ably.com, www.ably.com, ably-dev.com, www.ably-dev.com)
if (!ALLOWED_DOCS_HOSTNAMES.includes(parsedUrl.hostname)) {
return match;
}

// Only process /docs/ paths
if (!parsedUrl.pathname.startsWith('/docs/')) {
return match;
}

// Don't process if already has .md extension
if (parsedUrl.pathname.match(/\.md$/)) {
return match;
}

// Don't add .md if URL already has a file extension (e.g., .png, .jpg, .html, .pdf, etc.)
// This prevents converting image/file URLs like test.png to test.png.md
if (parsedUrl.pathname.match(/\.[a-zA-Z0-9]{2,5}$/)) {
return match;
}

// Normalize the path: remove trailing slash before adding .md
let normalizedPath = parsedUrl.pathname;
if (normalizedPath.endsWith('/')) {
normalizedPath = normalizedPath.slice(0, -1);
}

// Build the new URL with .md extension
// Remove query parameters (including ?lang=) but preserve hash for semantic context
const newUrl = `${parsedUrl.protocol}//${parsedUrl.host}${normalizedPath}.md${parsedUrl.hash}`;

return `[${linkText}](${newUrl})`;
});
}

/**
* Convert relative URLs to absolute URLs using the main website domain
* Converts: [text](/docs/channels) → [text](https://ably.com/docs/channels)
Expand Down Expand Up @@ -422,10 +484,13 @@ function transformMdxToMarkdown(
// Stage 7: Convert relative URLs to absolute URLs
content = convertRelativeUrls(content, siteUrl);

// Stage 8: Replace template variables
// Stage 8: Convert /docs/ links to .md extension and remove ?lang= params
content = convertDocsLinksToMarkdown(content);

// Stage 9: Replace template variables
content = replaceTemplateVariables(content);

// Stage 9: Prepend title as markdown heading
// Stage 10: Prepend title as markdown heading
const finalContent = `# ${title}\n\n${intro ? `${intro}\n\n` : ''}${content}`;

return { content: finalContent, title, intro };
Expand Down Expand Up @@ -544,6 +609,7 @@ export {
removeAnchorTags,
removeJsxComments,
convertImagePathsToGitHub,
convertDocsLinksToMarkdown,
convertRelativeUrls,
replaceTemplateVariables,
calculateOutputPath,
Expand Down