Skip to content

Commit eb51e85

Browse files
committed
feat(agent-ready): add markdown content negotiation + Link discovery headers
Two fixes targeting findings from Cloudflare's isitagentready.com audit that our own AEO audit didn't surface: 1. Markdown content negotiation. Requests to /posts/<slug> carrying Accept: text/markdown are rewritten by middleware to a new route /posts/<slug>/raw which serves the raw MDX with Content-Type: text/markdown. Browsers send Accept: text/html and continue to receive the HTML page unchanged. The /raw route sets Vary: Accept so shared caches keep the two representations separate. Includes a q-value-aware Accept parser so the default browser header pattern (text/html,...;q=0.9,*/*;q=0.8) doesn't accidentally match. 2. Link response headers (RFC 8288). next.config.ts now emits a Link header advertising agent-useful resources from every response: - </llms.txt>; rel="describedby" - </llms-full.txt>; rel="alternate" - </feed.xml>; rel="alternate" - </sitemap.xml>; rel="sitemap" These coexist with the preload Link headers Next.js auto-injects for fonts/CSS — multiple Link header values are valid per RFC 8288.
1 parent 01f2118 commit eb51e85

3 files changed

Lines changed: 137 additions & 1 deletion

File tree

next.config.ts

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,26 @@ import type { NextConfig } from "next";
2323
// 'unsafe-inline' on script-src is required by the JSON-LD <script>
2424
// blocks rendered via dangerouslySetInnerHTML. Phase 4 replaces this
2525
// with a per-request nonce so we can drop 'unsafe-inline' entirely.
26+
27+
// Link response headers (RFC 8288) advertising agent-discoverable
28+
// resources from the site root. We send these on every response — the
29+
// Cloudflare agent-readiness probe checks the homepage, and the values
30+
// are equally true everywhere else.
31+
//
32+
// Relation types:
33+
// describedby Pointer to the site description in agent-readable form
34+
// alternate Alternate representations (RSS feed, llms.txt)
35+
// sitemap The XML sitemap
36+
//
37+
// Next.js prepends its own preload Link headers for fonts/CSS. Multiple
38+
// Link headers on one response are valid per RFC 8288 §3.
39+
const LINK_HEADER = [
40+
'</llms.txt>; rel="describedby"; type="text/plain"',
41+
'</llms-full.txt>; rel="alternate"; type="text/plain"; title="Full content for LLMs"',
42+
'</feed.xml>; rel="alternate"; type="application/rss+xml"; title="RSS feed"',
43+
'</sitemap.xml>; rel="sitemap"; type="application/xml"',
44+
].join(", ");
45+
2646
const CSP_DIRECTIVES = [
2747
"default-src 'self'",
2848
"script-src 'self' 'unsafe-inline' https://va.vercel-scripts.com",
@@ -75,6 +95,10 @@ const nextConfig: NextConfig = {
7595
key: "Content-Security-Policy-Report-Only",
7696
value: CSP_DIRECTIVES,
7797
},
98+
{
99+
key: "Link",
100+
value: LINK_HEADER,
101+
},
78102
],
79103
},
80104
];

src/app/posts/[slug]/raw/route.ts

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
import { NextRequest, NextResponse } from "next/server";
2+
import { getPostBySlug } from "@/lib/posts";
3+
4+
/**
5+
* Markdown content negotiation endpoint.
6+
*
7+
* Two ways to reach this route:
8+
*
9+
* 1. Direct GET to /posts/<slug>/raw — always returns text/markdown.
10+
* Useful for tooling that wants a deterministic URL.
11+
*
12+
* 2. Internal rewrite from /posts/<slug> when the client sends
13+
* Accept: text/markdown (handled in src/middleware.ts).
14+
* Browsers and ordinary crawlers send Accept: text/html and
15+
* continue to receive the HTML page unchanged.
16+
*
17+
* The body is the raw MDX (frontmatter stripped) reconstructed with a
18+
* minimal YAML-ish header so agents can recover title/date/description
19+
* without parsing MDX components. Custom JSX components in the body
20+
* (PhoneScreenshot, etc.) are left as-is — they're inert for an LLM
21+
* reader and a markdown parser will simply ignore unknown tags.
22+
*/
23+
export async function GET(
24+
_request: NextRequest,
25+
{ params }: { params: Promise<{ slug: string }> },
26+
) {
27+
const { slug } = await params;
28+
const post = getPostBySlug(slug);
29+
30+
if (!post) {
31+
return new NextResponse("Not found", {
32+
status: 404,
33+
headers: { "Content-Type": "text/plain; charset=utf-8" },
34+
});
35+
}
36+
37+
const header = [
38+
`# ${post.title}`,
39+
"",
40+
`> ${post.description ?? ""}`,
41+
"",
42+
`- Author: Rob Whiteley`,
43+
`- Published: ${post.date}`,
44+
`- Canonical: https://vibescoder.dev/posts/${post.slug}`,
45+
post.tags?.length ? `- Tags: ${post.tags.join(", ")}` : "",
46+
"",
47+
"---",
48+
"",
49+
]
50+
.filter((line) => line !== "")
51+
.join("\n");
52+
53+
const body = `${header}\n\n${post.content.trim()}\n`;
54+
55+
return new NextResponse(body, {
56+
status: 200,
57+
headers: {
58+
"Content-Type": "text/markdown; charset=utf-8",
59+
"Cache-Control": "public, max-age=300, s-maxage=3600",
60+
// Tell shared caches that the representation depends on Accept
61+
// so the rewrite from /posts/<slug> doesn't poison the HTML cache.
62+
Vary: "Accept",
63+
"X-Robots-Tag": "index, follow",
64+
},
65+
});
66+
}

src/middleware.ts

Lines changed: 47 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,28 @@ function getSecret() {
1010
export async function middleware(request: NextRequest) {
1111
const { pathname } = request.nextUrl;
1212

13+
// ---------------------------------------------------------------------
14+
// Markdown content negotiation
15+
//
16+
// If an agent requests a /posts/<slug> URL with Accept: text/markdown,
17+
// rewrite to /posts/<slug>/raw which returns the raw MDX with
18+
// Content-Type: text/markdown. Browsers send Accept: text/html and
19+
// continue to receive the HTML page unchanged.
20+
//
21+
// The /raw route also sets Vary: Accept so shared caches keep the two
22+
// representations separate.
23+
// ---------------------------------------------------------------------
24+
if (pathname.startsWith("/posts/") && !pathname.endsWith("/raw")) {
25+
const accept = request.headers.get("accept") ?? "";
26+
if (prefersMarkdown(accept)) {
27+
const url = request.nextUrl.clone();
28+
url.pathname = pathname.replace(/\/?$/, "") + "/raw";
29+
const res = NextResponse.rewrite(url);
30+
res.headers.set("Vary", "Accept");
31+
return res;
32+
}
33+
}
34+
1335
// Allow login page and auth API routes through.
1436
if (
1537
pathname === "/admin/login" ||
@@ -50,6 +72,30 @@ export async function middleware(request: NextRequest) {
5072
}
5173
}
5274

75+
/**
76+
* Returns true when the client's Accept header explicitly prefers
77+
* text/markdown over text/html. We compare q-values rather than just
78+
* checking for substring presence so that browsers sending the default
79+
* "text/html,...;q=0.9,*\/*;q=0.8" pattern continue to get HTML.
80+
*/
81+
function prefersMarkdown(accept: string): boolean {
82+
if (!accept) return false;
83+
let markdownQ = 0;
84+
let htmlQ = 0;
85+
for (const part of accept.split(",")) {
86+
const [mediaType, ...params] = part.trim().split(";").map((s) => s.trim());
87+
if (!mediaType) continue;
88+
const qParam = params.find((p) => p.startsWith("q="));
89+
const q = qParam ? parseFloat(qParam.slice(2)) : 1;
90+
if (mediaType === "text/markdown") markdownQ = Math.max(markdownQ, q);
91+
else if (mediaType === "text/html") htmlQ = Math.max(htmlQ, q);
92+
}
93+
return markdownQ > 0 && markdownQ >= htmlQ;
94+
}
95+
5396
export const config = {
54-
matcher: ["/admin/:path*", "/api/:path*"],
97+
// Matchers must be static string literals — Next.js validates them at
98+
// build time. /posts/:path* covers the markdown negotiation rewrite;
99+
// the auth-guard logic above is unchanged.
100+
matcher: ["/admin/:path*", "/api/:path*", "/posts/:path*"],
55101
};

0 commit comments

Comments
 (0)