feat(agent-ready): add markdown content negotiation + Link discovery headers

carryologist · carryologist · commit eb51e8585fed · 2026-05-11T22:29:57.000Z
Two fixes targeting findings from Cloudflare's isitagentready.com audit
that our own AEO audit didn't surface:

1. Markdown content negotiation. Requests to /posts/&lt;slug&gt; carrying
   Accept: text/markdown are rewritten by middleware to a new route
   /posts/&lt;slug&gt;/raw which serves the raw MDX with
   Content-Type: text/markdown. Browsers send Accept: text/html and
   continue to receive the HTML page unchanged. The /raw route sets
   Vary: Accept so shared caches keep the two representations separate.
   Includes a q-value-aware Accept parser so the default browser header
   pattern (text/html,...;q=0.9,*/*;q=0.8) doesn't accidentally match.

2. Link response headers (RFC 8288). next.config.ts now emits a Link
   header advertising agent-useful resources from every response:
     - &lt;/llms.txt&gt;; rel="describedby"
     - &lt;/llms-full.txt&gt;; rel="alternate"
     - &lt;/feed.xml&gt;; rel="alternate"
     - &lt;/sitemap.xml&gt;; rel="sitemap"
   These coexist with the preload Link headers Next.js auto-injects
   for fonts/CSS — multiple Link header values are valid per RFC 8288.
diff --git a/next.config.ts b/next.config.ts
@@ -23,6 +23,26 @@ import type { NextConfig } from "next";
 // 'unsafe-inline' on script-src is required by the JSON-LD <script>
 // blocks rendered via dangerouslySetInnerHTML. Phase 4 replaces this
 // with a per-request nonce so we can drop 'unsafe-inline' entirely.
+
+// Link response headers (RFC 8288) advertising agent-discoverable
+// resources from the site root. We send these on every response — the
+// Cloudflare agent-readiness probe checks the homepage, and the values
+// are equally true everywhere else.
+//
+// Relation types:
+//   describedby   Pointer to the site description in agent-readable form
+//   alternate     Alternate representations (RSS feed, llms.txt)
+//   sitemap       The XML sitemap
+//
+// Next.js prepends its own preload Link headers for fonts/CSS. Multiple
+// Link headers on one response are valid per RFC 8288 §3.
+const LINK_HEADER = [
+  '</llms.txt>; rel="describedby"; type="text/plain"',
+  '</llms-full.txt>; rel="alternate"; type="text/plain"; title="Full content for LLMs"',
+  '</feed.xml>; rel="alternate"; type="application/rss+xml"; title="RSS feed"',
+  '</sitemap.xml>; rel="sitemap"; type="application/xml"',
+].join(", ");
+
 const CSP_DIRECTIVES = [
   "default-src 'self'",
   "script-src 'self' 'unsafe-inline' https://va.vercel-scripts.com",
@@ -75,6 +95,10 @@ const nextConfig: NextConfig = {
             key: "Content-Security-Policy-Report-Only",
             value: CSP_DIRECTIVES,
           },
+          {
+            key: "Link",
+            value: LINK_HEADER,
+          },
         ],
       },
     ];
diff --git a/src/app/posts/[slug]/raw/route.ts b/src/app/posts/[slug]/raw/route.ts
@@ -0,0 +1,66 @@
+import { NextRequest, NextResponse } from "next/server";
+import { getPostBySlug } from "@/lib/posts";
+
+/**
+ * Markdown content negotiation endpoint.
+ *
+ * Two ways to reach this route:
+ *
+ *  1. Direct GET to /posts/<slug>/raw — always returns text/markdown.
+ *     Useful for tooling that wants a deterministic URL.
+ *
+ *  2. Internal rewrite from /posts/<slug> when the client sends
+ *     Accept: text/markdown (handled in src/middleware.ts).
+ *     Browsers and ordinary crawlers send Accept: text/html and
+ *     continue to receive the HTML page unchanged.
+ *
+ * The body is the raw MDX (frontmatter stripped) reconstructed with a
+ * minimal YAML-ish header so agents can recover title/date/description
+ * without parsing MDX components. Custom JSX components in the body
+ * (PhoneScreenshot, etc.) are left as-is — they're inert for an LLM
+ * reader and a markdown parser will simply ignore unknown tags.
+ */
+export async function GET(
+  _request: NextRequest,
+  { params }: { params: Promise<{ slug: string }> },
+) {
+  const { slug } = await params;
+  const post = getPostBySlug(slug);
+
+  if (!post) {
+    return new NextResponse("Not found", {
+      status: 404,
+      headers: { "Content-Type": "text/plain; charset=utf-8" },
+    });
+  }
+
+  const header = [
+    `# ${post.title}`,
+    "",
+    `> ${post.description ?? ""}`,
+    "",
+    `- Author: Rob Whiteley`,
+    `- Published: ${post.date}`,
+    `- Canonical: https://vibescoder.dev/posts/${post.slug}`,
+    post.tags?.length ? `- Tags: ${post.tags.join(", ")}` : "",
+    "",
+    "---",
+    "",
+  ]
+    .filter((line) => line !== "")
+    .join("\n");
+
+  const body = `${header}\n\n${post.content.trim()}\n`;
+
+  return new NextResponse(body, {
+    status: 200,
+    headers: {
+      "Content-Type": "text/markdown; charset=utf-8",
+      "Cache-Control": "public, max-age=300, s-maxage=3600",
+      // Tell shared caches that the representation depends on Accept
+      // so the rewrite from /posts/<slug> doesn't poison the HTML cache.
+      Vary: "Accept",
+      "X-Robots-Tag": "index, follow",
+    },
+  });
+}
diff --git a/src/middleware.ts b/src/middleware.ts
@@ -10,6 +10,28 @@ function getSecret() {
 export async function middleware(request: NextRequest) {
   const { pathname } = request.nextUrl;
 
+  // ---------------------------------------------------------------------
+  // Markdown content negotiation
+  //
+  // If an agent requests a /posts/<slug> URL with Accept: text/markdown,
+  // rewrite to /posts/<slug>/raw which returns the raw MDX with
+  // Content-Type: text/markdown. Browsers send Accept: text/html and
+  // continue to receive the HTML page unchanged.
+  //
+  // The /raw route also sets Vary: Accept so shared caches keep the two
+  // representations separate.
+  // ---------------------------------------------------------------------
+  if (pathname.startsWith("/posts/") && !pathname.endsWith("/raw")) {
+    const accept = request.headers.get("accept") ?? "";
+    if (prefersMarkdown(accept)) {
+      const url = request.nextUrl.clone();
+      url.pathname = pathname.replace(/\/?$/, "") + "/raw";
+      const res = NextResponse.rewrite(url);
+      res.headers.set("Vary", "Accept");
+      return res;
+    }
+  }
+
   // Allow login page and auth API routes through.
   if (
     pathname === "/admin/login" ||
@@ -50,6 +72,30 @@ export async function middleware(request: NextRequest) {
   }
 }
 
+/**
+ * Returns true when the client's Accept header explicitly prefers
+ * text/markdown over text/html. We compare q-values rather than just
+ * checking for substring presence so that browsers sending the default
+ * "text/html,...;q=0.9,*\/*;q=0.8" pattern continue to get HTML.
+ */
+function prefersMarkdown(accept: string): boolean {
+  if (!accept) return false;
+  let markdownQ = 0;
+  let htmlQ = 0;
+  for (const part of accept.split(",")) {
+    const [mediaType, ...params] = part.trim().split(";").map((s) => s.trim());
+    if (!mediaType) continue;
+    const qParam = params.find((p) => p.startsWith("q="));
+    const q = qParam ? parseFloat(qParam.slice(2)) : 1;
+    if (mediaType === "text/markdown") markdownQ = Math.max(markdownQ, q);
+    else if (mediaType === "text/html") htmlQ = Math.max(htmlQ, q);
+  }
+  return markdownQ > 0 && markdownQ >= htmlQ;
+}
+
 export const config = {
-  matcher: ["/admin/:path*", "/api/:path*"],
+  // Matchers must be static string literals — Next.js validates them at
+  // build time. /posts/:path* covers the markdown negotiation rewrite;
+  // the auth-guard logic above is unchanged.
+  matcher: ["/admin/:path*", "/api/:path*", "/posts/:path*"],
 };