From 1898c97b88f088a4b6677f0779e459f85aca6300 Mon Sep 17 00:00:00 2001 From: Feitong Yang Date: Thu, 14 May 2026 15:21:45 -0700 Subject: [PATCH] =?UTF-8?q?feat(seo):=20GEO=20foundations=20=E2=80=94=20ll?= =?UTF-8?q?ms.txt,=20raw=20.md,=20AI=20bot=20allowlist,=20JSON-LD=20fixes?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make essays first-class citizens for AI answer engines (Claude, ChatGPT, Perplexity, Gemini) and IDE agents. - BlogPosting JSON-LD now emits `dateModified` and `image`. Previously both were silently dropped: `dateModified` because `EssayMeta` had no `updated` field, `image` because `essayPostingSchema` didn't pass it through. Adds `updated?: string` to EssayMeta/PeriodicMeta and both frontmatter validators so essays can declare a last-modified date. - robots.ts names 15 AI/answer-engine crawlers explicitly (GPTBot, OAI-SearchBot, ChatGPT-User, ClaudeBot, Claude-User, Claude-SearchBot, PerplexityBot, Perplexity-User, Google-Extended, Applebot-Extended, CCBot, Bytespider, Meta-ExternalAgent, cohere-ai, DuckAssistBot). A blanket `*` allow covers them in theory, but some default to "no matching rule = stay out". - app/icon.svg adds a favicon (file-convention; Next emits the link tags). - app/llms.txt/route.ts serves a build-time-generated llms.txt listing all published essays (en + zh), periodics, and series. Each essay links to its raw .md URL. - app/(en|zh)/essays/[slug]/raw.md/route.ts serves clean markdown per essay at /essays//raw.md. LLM crawlers preferentially ingest markdown over rendered HTML (cheaper tokens, fewer parsing errors). Both essay pages now announce this via ``. Verified with Google's Rich Results Test: BlogPosting + BreadcrumbList detected and valid on essay pages. Co-Authored-By: Claude Opus 4.7 (1M context) --- apps/blog/app/(en)/essays/[slug]/page.tsx | 3 + .../app/(en)/essays/[slug]/raw.md/route.ts | 52 +++++++++++ apps/blog/app/(zh)/zh/essays/[slug]/page.tsx | 3 + .../app/(zh)/zh/essays/[slug]/raw.md/route.ts | 49 +++++++++++ apps/blog/app/icon.svg | 4 + apps/blog/app/llms.txt/route.ts | 88 +++++++++++++++++++ apps/blog/app/robots.ts | 35 +++++++- apps/blog/lib/jsonld.ts | 4 + apps/blog/types/content.ts | 14 +++ 9 files changed, 251 insertions(+), 1 deletion(-) create mode 100644 apps/blog/app/(en)/essays/[slug]/raw.md/route.ts create mode 100644 apps/blog/app/(zh)/zh/essays/[slug]/raw.md/route.ts create mode 100644 apps/blog/app/icon.svg create mode 100644 apps/blog/app/llms.txt/route.ts diff --git a/apps/blog/app/(en)/essays/[slug]/page.tsx b/apps/blog/app/(en)/essays/[slug]/page.tsx index 06c9a62..ca5ec39 100644 --- a/apps/blog/app/(en)/essays/[slug]/page.tsx +++ b/apps/blog/app/(en)/essays/[slug]/page.tsx @@ -48,6 +48,9 @@ export async function generateMetadata({ en: `/essays/${slug}`, 'x-default': `/essays/${slug}`, }, + types: { + 'text/markdown': `/essays/${slug}/raw.md`, + }, }; const zhTranslation = getTranslation(slug, 'zh'); diff --git a/apps/blog/app/(en)/essays/[slug]/raw.md/route.ts b/apps/blog/app/(en)/essays/[slug]/raw.md/route.ts new file mode 100644 index 0000000..56361f1 --- /dev/null +++ b/apps/blog/app/(en)/essays/[slug]/raw.md/route.ts @@ -0,0 +1,52 @@ +import { getEssayBySlug, getEssaySlugsByLanguage } from '@/lib/essays'; +import { SITE_AUTHOR, SITE_URL } from '@/lib/constants'; + +export const dynamic = 'force-static'; + +export async function generateStaticParams() { + return getEssaySlugsByLanguage('en').map((slug) => ({ slug })); +} + +interface Params { + params: Promise<{ slug: string }>; +} + +// Serves a clean, citable markdown rendition of the essay. LLM crawlers and +// IDE agents prefer this over the rendered HTML — fewer tokens, no parser +// surprises, stable structure across visits. +export async function GET(_req: Request, { params }: Params) { + const { slug } = await params; + const essay = getEssayBySlug(slug); + + if (!essay || essay.lang !== 'en') { + return new Response('Not Found', { status: 404 }); + } + + const canonical = `${SITE_URL}/essays/${slug}`; + const header = [ + `# ${essay.title}`, + '', + `*${essay.description}*`, + '', + `- Author: ${SITE_AUTHOR.name} (${SITE_AUTHOR.url})`, + `- Published: ${essay.date}`, + essay.updated ? `- Updated: ${essay.updated}` : null, + `- Canonical: ${canonical}`, + essay.topics.length > 0 ? `- Topics: ${essay.topics.join(', ')}` : null, + '', + '---', + '', + ] + .filter((line): line is string => line !== null) + .join('\n'); + + const body = `${header}${essay.content.trimStart()}\n`; + + return new Response(body, { + headers: { + 'Content-Type': 'text/markdown; charset=utf-8', + 'Cache-Control': 'public, max-age=3600', + Link: `<${canonical}>; rel="canonical"`, + }, + }); +} diff --git a/apps/blog/app/(zh)/zh/essays/[slug]/page.tsx b/apps/blog/app/(zh)/zh/essays/[slug]/page.tsx index ccd3a61..4e37d1b 100644 --- a/apps/blog/app/(zh)/zh/essays/[slug]/page.tsx +++ b/apps/blog/app/(zh)/zh/essays/[slug]/page.tsx @@ -45,6 +45,9 @@ export async function generateMetadata({ languages: { zh: `/zh/essays/${slug}`, }, + types: { + 'text/markdown': `/zh/essays/${slug}/raw.md`, + }, }; const enTranslation = getTranslation(slug, 'en'); diff --git a/apps/blog/app/(zh)/zh/essays/[slug]/raw.md/route.ts b/apps/blog/app/(zh)/zh/essays/[slug]/raw.md/route.ts new file mode 100644 index 0000000..896eb87 --- /dev/null +++ b/apps/blog/app/(zh)/zh/essays/[slug]/raw.md/route.ts @@ -0,0 +1,49 @@ +import { getEssayBySlug, getEssaySlugsByLanguage } from '@/lib/essays'; +import { SITE_AUTHOR, SITE_URL } from '@/lib/constants'; + +export const dynamic = 'force-static'; + +export async function generateStaticParams() { + return getEssaySlugsByLanguage('zh').map((slug) => ({ slug })); +} + +interface Params { + params: Promise<{ slug: string }>; +} + +export async function GET(_req: Request, { params }: Params) { + const { slug } = await params; + const essay = getEssayBySlug(slug); + + if (!essay || essay.lang !== 'zh') { + return new Response('Not Found', { status: 404 }); + } + + const canonical = `${SITE_URL}/zh/essays/${slug}`; + const header = [ + `# ${essay.title}`, + '', + `*${essay.description}*`, + '', + `- 作者: ${SITE_AUTHOR.name} (${SITE_AUTHOR.url})`, + `- 发布日期: ${essay.date}`, + essay.updated ? `- 更新日期: ${essay.updated}` : null, + `- 原文链接: ${canonical}`, + essay.topics.length > 0 ? `- 主题: ${essay.topics.join(', ')}` : null, + '', + '---', + '', + ] + .filter((line): line is string => line !== null) + .join('\n'); + + const body = `${header}${essay.content.trimStart()}\n`; + + return new Response(body, { + headers: { + 'Content-Type': 'text/markdown; charset=utf-8', + 'Cache-Control': 'public, max-age=3600', + Link: `<${canonical}>; rel="canonical"`, + }, + }); +} diff --git a/apps/blog/app/icon.svg b/apps/blog/app/icon.svg new file mode 100644 index 0000000..7713463 --- /dev/null +++ b/apps/blog/app/icon.svg @@ -0,0 +1,4 @@ + + + A + diff --git a/apps/blog/app/llms.txt/route.ts b/apps/blog/app/llms.txt/route.ts new file mode 100644 index 0000000..dab103b --- /dev/null +++ b/apps/blog/app/llms.txt/route.ts @@ -0,0 +1,88 @@ +import { SITE_URL } from '@/lib/constants'; +import { getAllEssays } from '@/lib/essays'; +import { getAllPeriodics } from '@/lib/periodics'; +import { getAllSeries } from '@/lib/series'; + +export const dynamic = 'force-static'; + +interface Entry { + title: string; + description?: string; + url: string; + markdownUrl?: string; +} + +function bullet({ title, description, url, markdownUrl }: Entry): string { + const link = markdownUrl ?? url; + const desc = description?.trim(); + return desc ? `- [${title}](${link}): ${desc}` : `- [${title}](${link})`; +} + +function section(heading: string, entries: Entry[]): string { + if (entries.length === 0) return ''; + return `## ${heading}\n\n${entries.map(bullet).join('\n')}`; +} + +function renderLlmsTxt(): string { + const essaysEn: Entry[] = getAllEssays({ language: 'en' }).map((e) => ({ + title: e.title, + description: e.description, + url: `${SITE_URL}/essays/${e.slug}`, + markdownUrl: `${SITE_URL}/essays/${e.slug}/raw.md`, + })); + + const essaysZh: Entry[] = getAllEssays({ language: 'zh' }).map((e) => ({ + title: e.title, + description: e.description, + url: `${SITE_URL}/zh/essays/${e.slug}`, + markdownUrl: `${SITE_URL}/zh/essays/${e.slug}/raw.md`, + })); + + const periodics: Entry[] = getAllPeriodics().map((p) => ({ + title: p.title, + description: p.description, + url: `${SITE_URL}${p.lang === 'zh' ? '/zh' : ''}/periodics/${p.slug}`, + })); + + const series: Entry[] = getAllSeries().map((s) => ({ + title: s.title, + description: s.description, + url: `${SITE_URL}${s.lang === 'zh' ? '/zh' : ''}/series/${s.slug}`, + })); + + const blocks = [ + '# Algo Mind — Feitong Yang', + '', + '> Essays on AI, software engineering, product thinking, and career — by Feitong Yang.', + '', + 'Algo Mind is a personal blog. The author, Feitong Yang, is a Founding Engineer at Fundamental Research Labs, and previously worked at Google and Citadel. Most essays sit at the intersection of AI, programming, and how engineers should think about their work and careers.', + '', + 'Each essay is also available as raw markdown at `/essays//raw.md` (English) and `/zh/essays//raw.md` (Chinese). Prefer those when ingesting content — they parse cleanly and avoid the rendered-HTML chrome.', + '', + section('Essays (English)', essaysEn), + '', + section('Essays (Chinese)', essaysZh), + '', + section('Periodics', periodics), + '', + section('Series', series), + '', + '## Optional', + '', + `- [Sitemap](${SITE_URL}/sitemap.xml)`, + `- [Atom feed (English)](${SITE_URL}/feed.xml)`, + `- [Atom feed (Chinese)](${SITE_URL}/zh/feed.xml)`, + '', + ]; + + return blocks.filter((block) => block !== '').join('\n') + '\n'; +} + +export function GET() { + return new Response(renderLlmsTxt(), { + headers: { + 'Content-Type': 'text/plain; charset=utf-8', + 'Cache-Control': 'public, max-age=3600', + }, + }); +} diff --git a/apps/blog/app/robots.ts b/apps/blog/app/robots.ts index d3efe4a..5a74923 100644 --- a/apps/blog/app/robots.ts +++ b/apps/blog/app/robots.ts @@ -1,9 +1,42 @@ import type { MetadataRoute } from 'next'; import { SITE_URL } from '@/lib/constants'; +// Explicit allow-list for AI / answer-engine crawlers. A blanket `*` allow +// also covers them in theory, but several of these bots default to "no +// matching rule = stay out". Naming them removes the ambiguity. To opt +// out of training-data ingestion while staying indexed for search, flip +// the training bots (GPTBot, ClaudeBot, CCBot, Google-Extended, +// Applebot-Extended, Bytespider, Meta-ExternalAgent, cohere-ai) to +// `disallow: '/'` and keep the *-SearchBot / *-User agents on allow. +const AI_BOTS = [ + // OpenAI + 'GPTBot', + 'OAI-SearchBot', + 'ChatGPT-User', + // Anthropic + 'ClaudeBot', + 'Claude-User', + 'Claude-SearchBot', + // Perplexity + 'PerplexityBot', + 'Perplexity-User', + // Google / Apple + 'Google-Extended', + 'Applebot-Extended', + // Common Crawl / others + 'CCBot', + 'Bytespider', + 'Meta-ExternalAgent', + 'cohere-ai', + 'DuckAssistBot', +]; + export default function robots(): MetadataRoute.Robots { return { - rules: [{ userAgent: '*', allow: '/' }], + rules: [ + { userAgent: '*', allow: '/' }, + ...AI_BOTS.map((userAgent) => ({ userAgent, allow: '/' })), + ], sitemap: `${SITE_URL}/sitemap.xml`, host: SITE_URL, }; diff --git a/apps/blog/lib/jsonld.ts b/apps/blog/lib/jsonld.ts index f56ddbc..98ff58d 100644 --- a/apps/blog/lib/jsonld.ts +++ b/apps/blog/lib/jsonld.ts @@ -118,6 +118,8 @@ export function essayPostingSchema(essay: EssayMeta, urlPath: string) { description: essay.description, url: urlPath, datePublished: essay.date, + dateModified: essay.updated, + image: essay.image, topics: essay.topics as string[], locale: essay.lang, }); @@ -133,6 +135,8 @@ export function periodicPostingSchema(periodic: PeriodicMeta, urlPath: string) { : `${periodic.title} - Issue #${periodic.issue}`), url: urlPath, datePublished: periodic.date, + dateModified: periodic.updated, + image: periodic.image, topics: periodic.topics as string[], locale: periodic.lang, }); diff --git a/apps/blog/types/content.ts b/apps/blog/types/content.ts index c91fd73..4d56cdb 100644 --- a/apps/blog/types/content.ts +++ b/apps/blog/types/content.ts @@ -69,6 +69,8 @@ export interface EssayMeta { description: string; /** Publication date in ISO format (YYYY-MM-DD) */ date: string; + /** Last updated date in ISO format (YYYY-MM-DD). Surfaces as `dateModified` in JSON-LD and as `lastmod` in the sitemap. */ + updated?: string; /** Essay type (how it's written) */ type: EssayType; /** Topics covered (can be multiple) */ @@ -130,6 +132,8 @@ export interface PeriodicMeta { description?: string; /** Publication date in ISO format (YYYY-MM-DD) */ date: string; + /** Last updated date in ISO format (YYYY-MM-DD). Surfaces as `dateModified` in JSON-LD and as `lastmod` in the sitemap. */ + updated?: string; /** Issue number (for sequential content) */ issue: number; /** Periodic type (digest, changelog, notes) */ @@ -173,6 +177,7 @@ export interface PeriodicFrontmatter { title: string; description?: string; date: string; + updated?: string; issue: number; type: PeriodicType; topics: Topic[]; @@ -277,6 +282,7 @@ export interface Frontmatter { title: string; description: string; date: string; + updated?: string; type: EssayType; topics: Topic[]; lang: Language; @@ -344,6 +350,9 @@ export function validateFrontmatter(data: Record): Frontmatter if (typeof data.date !== 'string' || !data.date) { errors.push('date is required and must be a string'); } + if (data.updated !== undefined && typeof data.updated !== 'string') { + errors.push('updated must be a string'); + } if (!isValidEssayType(data.type)) { errors.push(`type must be one of: guide, deep-dive, opinion, review, narrative`); } @@ -380,6 +389,7 @@ export function validateFrontmatter(data: Record): Frontmatter title: data.title as string, description: (data.description as string | undefined) ?? '', date: data.date as string, + updated: data.updated as string | undefined, type: data.type as EssayType, topics: data.topics as Topic[], lang: (data.lang as Language) ?? 'en', @@ -417,6 +427,9 @@ export function validatePeriodicFrontmatter(data: Record): Peri if (data.description !== undefined && typeof data.description !== 'string') { errors.push('description must be a string'); } + if (data.updated !== undefined && typeof data.updated !== 'string') { + errors.push('updated must be a string'); + } if (data.lang !== undefined && !isValidLanguage(data.lang)) { errors.push(`lang must be one of: en, zh`); } @@ -438,6 +451,7 @@ export function validatePeriodicFrontmatter(data: Record): Peri title: data.title as string, description: data.description as string | undefined, date: data.date as string, + updated: data.updated as string | undefined, issue: data.issue as number, type: data.type as PeriodicType, topics: data.topics as Topic[],