diff --git a/packages/vinext/src/entries/pages-server-entry.ts b/packages/vinext/src/entries/pages-server-entry.ts
index 0992d9af7..ac4dc7db1 100644
--- a/packages/vinext/src/entries/pages-server-entry.ts
+++ b/packages/vinext/src/entries/pages-server-entry.ts
@@ -122,6 +122,10 @@ export async function generateServerEntry(
rewrites: nextConfig?.rewrites ?? { beforeFiles: [], afterFiles: [], fallback: [] },
headers: nextConfig?.headers ?? [],
expireTime: nextConfig?.expireTime,
+ // Serialized regex source — see `.nextjs-ref/packages/next/src/shared/lib/router/utils/html-bots.ts`.
+ // Used by the bot-aware fallback path (#1543) so the override stays
+ // consistent with streaming metadata gating.
+ htmlLimitedBots: nextConfig?.htmlLimitedBots,
i18n: nextConfig?.i18n ?? null,
// Mirrors Next.js `experimental.disableOptimizedLoading` — when false
// (the default), page scripts are emitted with `defer` in
. See
@@ -789,6 +793,12 @@ async function _renderPage(request, url, manifest, middlewareHeaders, options) {
},
routePattern,
routeUrl,
+ // Bot-aware fallback flip (#1543). When a crawler hits an unlisted
+ // \`fallback: true\` path we want a synchronous render with real
+ // props, not the \`Loading...\` shell. Mirrors Next.js's bot check in
+ // \`.nextjs-ref/packages/next/src/server/route-modules/pages/pages-handler.ts\`.
+ userAgent: request.headers.get("user-agent") || undefined,
+ htmlLimitedBots: vinextConfig.htmlLimitedBots,
runInFreshUnifiedContext(callback) {
var revalCtx = _createUnifiedCtx({
executionContext: _getRequestExecutionContext(),
diff --git a/packages/vinext/src/index.ts b/packages/vinext/src/index.ts
index 9d585a62d..08af5a096 100644
--- a/packages/vinext/src/index.ts
+++ b/packages/vinext/src/index.ts
@@ -3446,6 +3446,7 @@ export default function vinext(options: VinextOptions = {}): PluginOption[] {
nextConfig?.trailingSlash ?? false,
middlewarePath !== null,
nextConfig?.clientTraceMetadata,
+ nextConfig?.htmlLimitedBots,
);
const mwStatus = req.__vinextMiddlewareStatus;
diff --git a/packages/vinext/src/server/dev-server.ts b/packages/vinext/src/server/dev-server.ts
index 1d779c866..ca643ecf8 100644
--- a/packages/vinext/src/server/dev-server.ts
+++ b/packages/vinext/src/server/dev-server.ts
@@ -50,6 +50,7 @@ import { buildDefaultPagesNotFoundResponse } from "./pages-default-404.js";
import { resolvePagesPageMethodResponse } from "./pages-page-method.js";
import { isSerializableProps } from "./pages-serializable-props.js";
import { loadUserDocumentInitialProps } from "./pages-document-initial-props.js";
+import { isBotUserAgent } from "../utils/html-limited-bots.js";
/**
* Render a React element to a string using renderToReadableStream.
@@ -267,6 +268,14 @@ export function createSSRHandler(
* `next.config`. When undefined or empty, no meta tags are emitted.
*/
clientTraceMetadata?: readonly string[],
+ /**
+ * Serialized `htmlLimitedBots` regexp source from `next.config`. Used by the
+ * bot-aware fallback path (#1543): when a crawler hits an unlisted
+ * `fallback: true` path we render synchronously with real props instead of
+ * the `Loading...` shell. Mirrors the Next.js production check in
+ * `.nextjs-ref/packages/next/src/server/route-modules/pages/pages-handler.ts`.
+ */
+ htmlLimitedBots?: string,
) {
const matcher = fileMatcher ?? createValidFileMatcher();
@@ -526,19 +535,30 @@ export function createSSRHandler(
// Render the loading shell for `fallback: true` when the path
// wasn't pre-rendered. Data requests still resolve real props so
// the client can swap in after the shell ships.
+ //
+ // Crawler/bot deopt (#1543): if the request is from a known
+ // crawler, skip the fallback shell and synchronously render the
+ // full page so the bot indexes real content (not `Loading...`).
+ // Mirrors Next.js's bot check in
+ // `.nextjs-ref/packages/next/src/server/route-modules/pages/pages-handler.ts`.
if (fallback === true && !isValidPath && !isDataReq) {
- isFallbackRender = true;
- if (typeof routerShim.setSSRContext === "function") {
- routerShim.setSSRContext({
- pathname: patternToNextFormat(route.pattern),
- query,
- asPath: url,
- locale: locale ?? currentDefaultLocale,
- locales: i18nConfig?.locales,
- defaultLocale: currentDefaultLocale,
- domainLocales,
- isFallback: true,
- });
+ const userAgentHeader = req.headers["user-agent"];
+ const userAgent = Array.isArray(userAgentHeader) ? userAgentHeader[0] : userAgentHeader;
+ const isBotReq = !!userAgent && isBotUserAgent(userAgent, htmlLimitedBots);
+ if (!isBotReq) {
+ isFallbackRender = true;
+ if (typeof routerShim.setSSRContext === "function") {
+ routerShim.setSSRContext({
+ pathname: patternToNextFormat(route.pattern),
+ query,
+ asPath: url,
+ locale: locale ?? currentDefaultLocale,
+ locales: i18nConfig?.locales,
+ defaultLocale: currentDefaultLocale,
+ domainLocales,
+ isFallback: true,
+ });
+ }
}
}
}
diff --git a/packages/vinext/src/server/pages-page-data.ts b/packages/vinext/src/server/pages-page-data.ts
index e70c8d322..4359b0470 100644
--- a/packages/vinext/src/server/pages-page-data.ts
+++ b/packages/vinext/src/server/pages-page-data.ts
@@ -13,6 +13,7 @@ import {
} from "./pages-page-response.js";
import { buildDefaultPagesNotFoundResponse } from "./pages-default-404.js";
import { isSerializableProps } from "./pages-serializable-props.js";
+import { isBotUserAgent } from "../utils/html-limited-bots.js";
type PagesRedirectResult = {
destination: string;
@@ -161,6 +162,25 @@ export type ResolvePagesPageDataOptions = {
* trigger lands.
*/
isOnDemandRevalidate?: boolean;
+ /**
+ * Request `User-Agent` header. Used to detect crawlers/bots so that an
+ * unlisted `fallback: true` path is rendered synchronously (with real
+ * props) for bots instead of shipping the loading shell. Mirrors Next.js
+ * `pages-handler.ts`:
+ *
+ * if ((isIsrFallback && isBot(req.headers['user-agent'])) || isMinimalMode) {
+ * isIsrFallback = false
+ * }
+ *
+ * See: `.nextjs-ref/packages/next/src/server/route-modules/pages/pages-handler.ts`.
+ */
+ userAgent?: string;
+ /**
+ * Serialized `htmlLimitedBots` regexp source from `next.config`. When set,
+ * extends the default bot list used by `isBotUserAgent` for the fallback
+ * flip — keeps the override consistent with streaming metadata gating.
+ */
+ htmlLimitedBots?: string;
pageModule: PagesPageModule;
params: Record;
query: Record;
@@ -395,6 +415,20 @@ export async function resolvePagesPageData(
}
}
+ // Crawler/bot deopt: a bot hitting an unlisted `fallback: true` path
+ // should get a blocking synchronous render (real content) rather than the
+ // loading shell, so the crawler indexes the actual page and not
+ // `Loading...`. Mirrors Next.js's bot check in `pages-handler.ts`:
+ // `.nextjs-ref/packages/next/src/server/route-modules/pages/pages-handler.ts`.
+ // Refs #1543.
+ if (
+ isFallback &&
+ options.userAgent &&
+ isBotUserAgent(options.userAgent, options.htmlLimitedBots)
+ ) {
+ isFallback = false;
+ }
+
let pageProps: Record = {};
let gsspRes: PagesMutableGsspResponse | null = null;
diff --git a/packages/vinext/src/utils/html-limited-bots.ts b/packages/vinext/src/utils/html-limited-bots.ts
index 3d6adcff0..90f57c838 100644
--- a/packages/vinext/src/utils/html-limited-bots.ts
+++ b/packages/vinext/src/utils/html-limited-bots.ts
@@ -2,6 +2,13 @@
// packages/next/src/shared/lib/router/utils/html-bots.ts
const HTML_LIMITED_BOT_UA_RE_STRING = String.raw`[\w-]+-Google|Google-[\w-]+|Chrome-Lighthouse|Slurp|DuckDuckBot|baiduspider|yandex|sogou|bitlybot|tumblr|vkShare|quora link preview|redditbot|ia_archiver|Bingbot|BingPreview|applebot|facebookexternalhit|facebookcatalog|Twitterbot|LinkedInBot|Slackbot|Discordbot|WhatsApp|SkypeUriPreview|Yeti|googleweblight`;
+// Headless browser bot (executes JS). Mirrors Next.js
+// `HEADLESS_BROWSER_BOT_UA_RE` in
+// `.nextjs-ref/packages/next/src/shared/lib/router/utils/is-bot.ts`.
+// Matches "Googlebot" but NOT "Mediapartners-Google" / "AdsBot-Google" /
+// other Google crawlers, which are covered by the HTML-limited list.
+const HEADLESS_BROWSER_BOT_UA_RE = /Googlebot(?!-)|Googlebot$/i;
+
const htmlLimitedBotRegexCache = new Map();
export function getHtmlLimitedBotRegex(htmlLimitedBots: string | undefined): RegExp {
@@ -13,3 +20,25 @@ export function getHtmlLimitedBotRegex(htmlLimitedBots: string | undefined): Reg
htmlLimitedBotRegexCache.set(source, regex);
return regex;
}
+
+/**
+ * Returns true when the User-Agent matches a known crawler/bot. Combines
+ * Next.js's "headless browser bot" check (Googlebot proper) with the
+ * "HTML-limited bot" list (Bingbot, DuckDuckBot, facebookexternalhit, …).
+ *
+ * Used by the Pages Router fallback path: a bot hitting an unlisted
+ * `fallback: true` route should get a synchronous render (real content) and
+ * not the loading shell, so the crawler indexes the actual page. Mirrors
+ * Next.js's `isBot()` in `.nextjs-ref/packages/next/src/shared/lib/router/utils/is-bot.ts`
+ * and the bot-aware fallback flip in
+ * `.nextjs-ref/packages/next/src/server/route-modules/pages/pages-handler.ts`.
+ *
+ * `htmlLimitedBots` allows next.config to override the HTML-limited list
+ * (same flag that drives `getHtmlLimitedBotRegex`), so a custom list applies
+ * to both streaming metadata gating and bot-aware fallback rendering.
+ */
+export function isBotUserAgent(userAgent: string, htmlLimitedBots?: string): boolean {
+ if (!userAgent) return false;
+ if (HEADLESS_BROWSER_BOT_UA_RE.test(userAgent)) return true;
+ return getHtmlLimitedBotRegex(htmlLimitedBots).test(userAgent);
+}
diff --git a/tests/pages-page-data.test.ts b/tests/pages-page-data.test.ts
index e2ae13a3c..3f7ae8935 100644
--- a/tests/pages-page-data.test.ts
+++ b/tests/pages-page-data.test.ts
@@ -120,6 +120,70 @@ describe("pages page data", () => {
await expect(result.response.text()).resolves.toContain("This page could not be found.");
});
+ // Refs #1543: a crawler/bot UA hitting an unlisted `fallback: true` path
+ // must NOT receive the loading shell — it should render synchronously so
+ // the bot indexes real content. Mirrors Next.js's bot check in
+ // `.nextjs-ref/packages/next/src/server/route-modules/pages/pages-handler.ts`.
+ it("does not set isFallback for bot User-Agent on unlisted fallback: true paths", async () => {
+ let gspCalled = false;
+ const result = await resolvePagesPageData(
+ createOptions({
+ pageModule: {
+ async getStaticPaths() {
+ return {
+ fallback: true,
+ paths: [{ params: { slug: "hello-world" } }],
+ };
+ },
+ async getStaticProps({ params }) {
+ gspCalled = true;
+ return { props: { slug: params?.slug ?? null } };
+ },
+ },
+ params: { slug: "unknown" },
+ query: { slug: "unknown" },
+ route: { isDynamic: true },
+ routeUrl: "/posts/unknown",
+ userAgent: "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)",
+ }),
+ );
+
+ expect(result.kind).toBe("render");
+ if (result.kind !== "render") throw new Error("expected render result");
+ expect(result.isFallback).toBe(false);
+ expect(gspCalled).toBe(true);
+ expect(result.pageProps).toMatchObject({ slug: "unknown" });
+ });
+
+ it("sets isFallback for normal browser User-Agent on unlisted fallback: true paths", async () => {
+ const result = await resolvePagesPageData(
+ createOptions({
+ pageModule: {
+ async getStaticPaths() {
+ return {
+ fallback: true,
+ paths: [{ params: { slug: "hello-world" } }],
+ };
+ },
+ async getStaticProps() {
+ throw new Error("getStaticProps should not run on a fallback shell render");
+ },
+ },
+ params: { slug: "unknown" },
+ query: { slug: "unknown" },
+ route: { isDynamic: true },
+ routeUrl: "/posts/unknown",
+ userAgent:
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Safari/537.36",
+ }),
+ );
+
+ expect(result.kind).toBe("render");
+ if (result.kind !== "render") throw new Error("expected render result");
+ expect(result.isFallback).toBe(true);
+ expect(result.pageProps).toEqual({});
+ });
+
it("short-circuits getServerSideProps responses after res.end()", async () => {
const responsePromise = Promise.resolve(
new Response('{"ok":true}', {
diff --git a/tests/pages-router.test.ts b/tests/pages-router.test.ts
index 28978aa1f..a5d4b5c8c 100644
--- a/tests/pages-router.test.ts
+++ b/tests/pages-router.test.ts
@@ -1486,6 +1486,56 @@ describe("Pages Router integration", () => {
expect(json.pageProps).toMatchObject({ pid: "unknown" });
});
+ // Refs #1543: bot/crawler requests must bypass the `fallback: true` loading
+ // shell and synchronously render real content so crawlers index the page,
+ // not `Loading...`. Mirrors Next.js's bot check in
+ // `.nextjs-ref/packages/next/src/server/route-modules/pages/pages-handler.ts`
+ // and the Next.js e2e regression test
+ // `.nextjs-ref/test/e2e/prerender-crawler.test.ts`.
+ it("renders synchronously (not the fallback shell) for crawler UAs on unlisted fallback: true paths", async () => {
+ const userAgents = [
+ "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)",
+ "Mozilla/5.0 (compatible; Bingbot/2.0; +http://www.bing.com/bingbot.htm)",
+ "DuckDuckBot/1.0; (+http://duckduckgo.com/duckduckbot.html)",
+ "Mozilla/5.0 (compatible; YandexBot/3.0; +http://yandex.com/bots)",
+ "facebookexternalhit/1.0 (+http://www.facebook.com/externalhit_uatext.php)",
+ ];
+ for (const userAgent of userAgents) {
+ const slug = `bot-slug-${Math.random().toString(36).slice(2)}`;
+ const res = await fetch(`${baseUrl}/products/${slug}`, {
+ headers: { "user-agent": userAgent },
+ });
+ expect(res.status, `UA: ${userAgent}`).toBe(200);
+ const html = await res.text();
+ // Bot should see the real rendered page, not the loading shell.
+ expect(html, `UA: ${userAgent}`).not.toContain("Loading product...");
+ expect(html, `UA: ${userAgent}`).toMatch(new RegExp(`Product ID:.*${slug}`));
+ const match = html.match(/__NEXT_DATA__\s*=\s*(\{.*?\})\s*[;<]/);
+ expect(match, `UA: ${userAgent}`).toBeTruthy();
+ const nextData = JSON.parse(match![1]);
+ expect(nextData.isFallback, `UA: ${userAgent}`).toBe(false);
+ expect(nextData.props.pageProps).toMatchObject({ pid: slug });
+ }
+ });
+
+ it("still ships the fallback shell for normal browser UAs on unlisted fallback: true paths", async () => {
+ // Counterpart of the crawler test — the bot-flip must not catch real
+ // browsers. Plain Chrome UA should still receive the loading shell.
+ const res = await fetch(`${baseUrl}/products/non-bot-slug`, {
+ headers: {
+ "user-agent":
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Safari/537.36",
+ },
+ });
+ expect(res.status).toBe(200);
+ const html = await res.text();
+ expect(html).toContain("Loading product...");
+ const match = html.match(/__NEXT_DATA__\s*=\s*(\{.*?\})\s*[;<]/);
+ expect(match).toBeTruthy();
+ const nextData = JSON.parse(match![1]);
+ expect(nextData.isFallback).toBe(true);
+ });
+
it("includes isFallback: false in __NEXT_DATA__", async () => {
const res = await fetch(`${baseUrl}/products/widget`);
const html = await res.text();