Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,10 @@ const features: Feature[] = [{
title: 'Smarter Counts',
description: 'Use optimized COUNT queries for API pagination when safe',
flag: 'smarterCounts'
}, {
title: 'LLMs.txt',
description: 'Serve llms.txt, per-entry markdown exports, and Accept: text/markdown content negotiation for AI and LLM tooling',
flag: 'llmsTxt'
}];

const AlphaFeatures: React.FC = () => {
Expand Down
67 changes: 67 additions & 0 deletions ghost/core/core/frontend/services/llms/handler.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
const logging = require('@tryghost/logging');
const sentry = require('../../../shared/sentry');
const urlUtils = require('../../../shared/url-utils');

const LLMS_LOG_KEY = '[llms]';

function createLlmsHandler({llmsService, config, settingsCache}) {
function handleDisabledLlmsRequest(req, res, next) {
if (settingsCache.get('is_private')) {
return next();
}

return res.redirect(302, urlUtils.urlFor({relativeUrl: '/'}));
}

function setLlmsHeaders(res) {
res.set({
'Cache-Control': `public, max-age=${config.get('caching:llms:maxAge')}`,
'Content-Type': 'text/plain; charset=utf-8'
});
}

async function serveLlms(req, res, next, format) {
try {
if (!llmsService.isEnabled()) {
return handleDisabledLlmsRequest(req, res, next);
}

const content = format === 'full'
? await llmsService.getLlmsFullTxt()
: await llmsService.getLlmsTxt();

if (!content) {
return next();
}

setLlmsHeaders(res);
return res.send(content);
} catch (err) {
const eventName = `llms.serve_${format}`;
const eventDetails = {route: req.path};

logging.error({
system: {event: eventName, ...eventDetails},
err
}, `${LLMS_LOG_KEY} ${err.message}`);

sentry.captureException(err, {
tags: {source: eventName},
extra: eventDetails
});

return next(err);
}
}

function mountLlmsRoutes(siteApp) {
siteApp.get('/llms.txt', (req, res, next) => serveLlms(req, res, next, 'index'));
siteApp.get('/llms-full.txt', (req, res, next) => serveLlms(req, res, next, 'full'));
siteApp.get('/.well-known/llms.txt', (req, res, next) => serveLlms(req, res, next, 'index'));
siteApp.get('/.well-known/llms-full.txt', (req, res, next) => serveLlms(req, res, next, 'full'));
}

return {mountLlmsRoutes};
}

module.exports = {createLlmsHandler};
180 changes: 180 additions & 0 deletions ghost/core/core/frontend/services/llms/markdown.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
const {NodeHtmlMarkdown} = require('node-html-markdown');
const htmlToPlaintext = require('@tryghost/html-to-plaintext');

const MAX_DESCRIPTION_LENGTH = 300;

const nhm = new NodeHtmlMarkdown({
bulletMarker: '-',
codeFence: '```',
emDelimiter: '*',
strongDelimiter: '**'
});

function collapseWhitespace(value) {
return (value || '').replace(/\s+/g, ' ').trim();
}

function truncateDescription(value, maxLength = MAX_DESCRIPTION_LENGTH) {
const collapsed = collapseWhitespace(value);

if (!collapsed || collapsed.length <= maxLength) {
return collapsed;
}

return `${collapsed.slice(0, maxLength - 1).trimEnd()}…`;
}

function getMarkdownPath(pathname) {
if (!pathname || pathname === '/') {
return '/index.md';
}

const normalizedPath = pathname.endsWith('/') ? pathname.slice(0, -1) : pathname;
return `${normalizedPath}.md`;
}

function getMarkdownUrl(url) {
const parsedUrl = new URL(url);
parsedUrl.pathname = getMarkdownPath(parsedUrl.pathname);
return parsedUrl.toString();
}

function getResourcePathFromMarkdownPath(pathname) {
if (!pathname || !pathname.endsWith('.md')) {
return null;
}

const stripped = pathname.slice(0, -3);

if (!stripped || stripped === '/index') {
return '/';
}

return stripped.endsWith('/') ? stripped : `${stripped}/`;
}

function getAcceptedMarkdownContentType(req) {
const acceptHeader = (req.get('Accept') || '').toLowerCase();

if (!acceptHeader.includes('text/markdown') && !acceptHeader.includes('text/plain')) {
return null;
Comment thread
coderabbitai[bot] marked this conversation as resolved.
}

const preferredType = req.accepts(['text/markdown', 'text/plain', 'text/html']);

if (!preferredType || preferredType === 'text/html') {
return null;
}

return preferredType;
}

function markdownFromHtml(html) {
const markdown = nhm.translate(html || '').trim();

if (!markdown) {
return null;
}

return markdown.replace(/\n{3,}/g, '\n\n');
}

function formatIsoDate(value) {
if (!value) {
return null;
}

const date = new Date(value);

if (isNaN(date.getTime())) {
return null;
}

return date.toISOString();
}
Comment thread
coderabbitai[bot] marked this conversation as resolved.

function getPrimaryAuthorName(entry) {
if (entry.primary_author?.name) {
return entry.primary_author.name;
}

if (Array.isArray(entry.authors) && entry.authors[0]?.name) {
return entry.authors[0].name;
}

return null;
}

function getTagNames(entry) {
if (Array.isArray(entry.tags) && entry.tags.length) {
return entry.tags.map(t => t.name).filter(Boolean);
}

if (entry.primary_tag?.name) {
return [entry.primary_tag.name];
}

return [];
}

function renderEntryMarkdownBody(entry) {
const markdown = markdownFromHtml(entry.html);

if (markdown) {
return markdown;
}

if (entry.plaintext) {
return collapseWhitespace(entry.plaintext);
}

return collapseWhitespace(htmlToPlaintext.excerpt(entry.html || ''));
}

function renderEntryMarkdown(entry, {llmsIndexUrl}) {
const tags = getTagNames(entry);
const metadata = [
entry.url ? `- URL: ${entry.url}` : null,
entry.type ? `- Type: ${entry.type}` : null,
formatIsoDate(entry.published_at) ? `- Published: ${formatIsoDate(entry.published_at)}` : null,
formatIsoDate(entry.updated_at) ? `- Updated: ${formatIsoDate(entry.updated_at)}` : null,
collapseWhitespace(entry.custom_excerpt) ? `- Description: ${collapseWhitespace(entry.custom_excerpt)}` : null,
getPrimaryAuthorName(entry) ? `- Author: ${getPrimaryAuthorName(entry)}` : null,
tags.length ? `- Tags: ${tags.join(', ')}` : null
].filter(Boolean);

const body = renderEntryMarkdownBody(entry) || '_No content available._';
const lines = [
'> ## Content Index',
`> Fetch the complete content index at: ${llmsIndexUrl}`,
'> Use this file to discover other available public pages before exploring further.',
'',
`# ${entry.title || 'Untitled'}`
];

if (metadata.length) {
lines.push(...metadata, '');
} else {
lines.push('');
}

lines.push(body);

return lines.join('\n');
}

module.exports = {
MAX_DESCRIPTION_LENGTH,
collapseWhitespace,
formatIsoDate,
getAcceptedMarkdownContentType,
getMarkdownPath,
getMarkdownUrl,
getPrimaryAuthorName,
getTagNames,
getResourcePathFromMarkdownPath,
markdownFromHtml,
renderEntryMarkdown,
renderEntryMarkdownBody,
truncateDescription
};
Loading
Loading