diff --git a/lib/fetch-remote-asset.test.ts b/lib/fetch-remote-asset.test.ts new file mode 100644 index 00000000..1be5abf4 --- /dev/null +++ b/lib/fetch-remote-asset.test.ts @@ -0,0 +1,117 @@ +/* @vitest-environment node */ +import { describe, expect, it, vi } from "vitest"; +import { + fetchRemoteAsset, + type RemoteAssetError, +} from "@/lib/fetch-remote-asset"; + +// Each test replaces the global fetch/DNS lookup so we can simulate edge cases deterministically. +const fetchMock = vi.hoisted(() => vi.fn()); +const dnsLookupMock = vi.hoisted(() => + vi.fn(async () => [{ address: "93.184.216.34", family: 4 }]), +); + +vi.mock("node:dns/promises", () => ({ + lookup: dnsLookupMock, +})); + +describe("fetchRemoteAsset", () => { + beforeEach(() => { + fetchMock.mockReset(); + dnsLookupMock.mockReset(); + dnsLookupMock.mockResolvedValue([{ address: "93.184.216.34", family: 4 }]); + globalThis.fetch = fetchMock as unknown as typeof fetch; + }); + + it("returns buffer and content type for valid asset", async () => { + const body = new Uint8Array([1, 2, 3]); + fetchMock.mockResolvedValueOnce( + new Response(body, { + status: 200, + headers: { "content-type": "image/png" }, + }), + ); + + const result = await fetchRemoteAsset({ + url: "https://example.com/image.png", + maxBytes: 1024, + }); + + expect(Buffer.isBuffer(result.buffer)).toBe(true); + expect(result.contentType).toBe("image/png"); + expect(result.finalUrl).toBe("https://example.com/image.png"); + }); + + it("rejects http URLs when allowHttp not set", async () => { + await expect( + fetchRemoteAsset({ url: "http://example.com/file.png" }), + ).rejects.toMatchObject({ + code: "protocol_not_allowed", + } satisfies Partial); + }); + + it("allows http URLs when allowHttp is true", async () => { + fetchMock.mockResolvedValueOnce( + new Response(new Uint8Array([1]), { status: 200 }), + ); + const result = await fetchRemoteAsset({ + url: "http://example.com/icon.png", + allowHttp: true, + }); + expect(result.finalUrl).toBe("http://example.com/icon.png"); + }); + + it("blocks hosts that resolve to private IPs", async () => { + dnsLookupMock.mockResolvedValueOnce([{ address: "10.0.0.5", family: 4 }]); + await expect( + fetchRemoteAsset({ url: "https://private.example/icon.png" }), + ).rejects.toMatchObject({ + code: "private_ip", + } satisfies Partial); + expect(fetchMock).not.toHaveBeenCalled(); + }); + + it("follows redirects up to limit", async () => { + const redirectResponse = new Response(null, { + status: 302, + headers: { location: "https://cdn.example.com/img.png" }, + }); + const finalResponse = new Response(new Uint8Array([1, 2, 3]), { + status: 200, + headers: { "content-type": "image/png" }, + }); + fetchMock + .mockResolvedValueOnce(redirectResponse) + .mockResolvedValueOnce(finalResponse); + + dnsLookupMock + .mockResolvedValueOnce([{ address: "93.184.216.34", family: 4 }]) + .mockResolvedValueOnce([{ address: "93.184.216.35", family: 4 }]); + + const result = await fetchRemoteAsset({ + url: "https://example.com/img.png", + maxRedirects: 2, + }); + expect(result.finalUrl).toBe("https://cdn.example.com/img.png"); + expect(fetchMock).toHaveBeenCalledTimes(2); + }); + + it("throws when asset exceeds configured size", async () => { + const largeBody = new Uint8Array(1024); + fetchMock.mockResolvedValueOnce( + new Response(largeBody, { + status: 200, + headers: { "content-type": "image/png" }, + }), + ); + + await expect( + fetchRemoteAsset({ + url: "https://example.com/large.png", + maxBytes: 10, + }), + ).rejects.toMatchObject({ + code: "size_exceeded", + } satisfies Partial); + }); +}); diff --git a/lib/fetch-remote-asset.ts b/lib/fetch-remote-asset.ts new file mode 100644 index 00000000..66367b82 --- /dev/null +++ b/lib/fetch-remote-asset.ts @@ -0,0 +1,333 @@ +import { lookup as dnsLookup } from "node:dns/promises"; +import { isIP } from "node:net"; +import * as ipaddr from "ipaddr.js"; + +// Hosts that should never be fetched regardless of DNS (fast path). +const BLOCKED_HOSTNAMES = new Set(["localhost"]); +const BLOCKED_SUFFIXES = [".local", ".internal", ".localhost"]; + +// Sensible defaults; callers can override per-use. +const DEFAULT_MAX_BYTES = 5 * 1024 * 1024; // 5MB +const DEFAULT_TIMEOUT_MS = 8000; +const DEFAULT_MAX_REDIRECTS = 3; + +export type RemoteAssetErrorCode = + | "invalid_url" + | "protocol_not_allowed" + | "host_not_allowed" + | "host_blocked" + | "dns_error" + | "private_ip" + | "redirect_limit" + | "response_error" + | "size_exceeded"; + +export class RemoteAssetError extends Error { + constructor( + public readonly code: RemoteAssetErrorCode, + message: string, + public readonly status?: number, + ) { + super(message); + this.name = "RemoteAssetError"; + } +} + +export type FetchRemoteAssetOptions = { + /** Absolute URL, or relative to `currentUrl` when provided. */ + url: string | URL; + /** Optional base URL used to resolve relative `url` values. */ + currentUrl?: string | URL; + /** Additional headers (e.g., `User-Agent`). */ + headers?: HeadersInit; + /** Abort timeout per request/redirect hop (ms). */ + timeoutMs?: number; + /** Maximum bytes to buffer before aborting. */ + maxBytes?: number; + /** Maximum redirects we will follow while re-checking the host. */ + maxRedirects?: number; + /** Additional allow list to further restrict hosts (still subject to default blocklist). */ + allowedHosts?: string[]; + /** Allow HTTP (useful for favicons); defaults to HTTPS only. */ + allowHttp?: boolean; +}; + +export type RemoteAssetResult = { + buffer: Buffer; + contentType: string | null; + finalUrl: string; + status: number; +}; + +/** + * Fetch a user-controlled asset while protecting against SSRF, redirect-based + * host swapping, and unbounded memory usage. + */ +export async function fetchRemoteAsset( + opts: FetchRemoteAssetOptions, +): Promise { + let currentUrl = toUrl(opts.url, opts.currentUrl); + const timeoutMs = opts.timeoutMs ?? DEFAULT_TIMEOUT_MS; + const maxBytes = opts.maxBytes ?? DEFAULT_MAX_BYTES; + const maxRedirects = opts.maxRedirects ?? DEFAULT_MAX_REDIRECTS; + const allowHttp = opts.allowHttp ?? false; + const allowedHosts = + opts.allowedHosts + ?.map((host) => host.trim().toLowerCase()) + .filter(Boolean) ?? []; + + for (let redirectCount = 0; redirectCount <= maxRedirects; redirectCount++) { + // Treat every hop (including the initial request) as untrusted and re-run + // the hostname/IP vetting so redirects cannot smuggle us into a private net. + await ensureUrlAllowed(currentUrl, { allowHttp, allowedHosts }); + + const response = await timedFetch(currentUrl.toString(), { + headers: opts.headers, + timeoutMs, + }); + + if (isRedirect(response)) { + if (redirectCount === maxRedirects) { + throw new RemoteAssetError( + "redirect_limit", + `Too many redirects fetching ${currentUrl.toString()}`, + ); + } + // Follow the Location manually so we can validate the next host ourselves. + const location = response.headers.get("location"); + if (!location) { + throw new RemoteAssetError( + "response_error", + "Redirect response missing Location header", + ); + } + const nextUrl = new URL(location, currentUrl); + currentUrl = nextUrl; + continue; + } + + if (!response.ok) { + const error = new RemoteAssetError( + "response_error", + `Remote asset request failed with ${response.status}`, + response.status, + ); + console.warn("[remote-asset] response error", { + url: currentUrl.toString(), + reason: error.message, + }); + throw error; + } + + const declaredLength = response.headers.get("content-length"); + if (declaredLength) { + const declared = Number(declaredLength); + if (Number.isFinite(declared) && declared > maxBytes) { + const error = new RemoteAssetError( + "size_exceeded", + `Remote asset declared size ${declared} exceeds limit ${maxBytes}`, + ); + console.warn("[remote-asset] size exceeded", { + url: currentUrl.toString(), + reason: error.message, + }); + throw error; + } + } + + const buffer = await readBodyWithLimit(response, maxBytes); + const contentType = response.headers.get("content-type"); + return { + buffer, + contentType, + finalUrl: currentUrl.toString(), + status: response.status, + }; + } + + throw new RemoteAssetError("redirect_limit", "Exceeded redirect limit"); +} + +function toUrl(input: string | URL, base?: string | URL): URL { + if (input instanceof URL) return input; + try { + return base ? new URL(input, base) : new URL(input); + } catch { + throw new RemoteAssetError("invalid_url", `Invalid URL: ${input}`); + } +} + +/** + * Validate scheme + hostname, ensure DNS does not resolve to private ranges, + * and respect the optional allow list. + */ +async function ensureUrlAllowed( + url: URL, + options: { allowHttp: boolean; allowedHosts: string[] }, +) { + const protocol = url.protocol.toLowerCase(); + // HTTPS is the default; only allow HTTP when explicitly opted-in. + if (protocol !== "https:" && !(options.allowHttp && protocol === "http:")) { + throw new RemoteAssetError( + "protocol_not_allowed", + `Protocol ${protocol} not allowed`, + ); + } + + const hostname = url.hostname.trim().toLowerCase(); + if (!hostname) { + throw new RemoteAssetError("invalid_url", "URL missing hostname"); + } + + if ( + BLOCKED_HOSTNAMES.has(hostname) || + BLOCKED_SUFFIXES.some((suffix) => hostname.endsWith(suffix)) + ) { + console.warn("[remote-asset] blocked host", { + url: url.toString(), + reason: "host_blocked", + }); + throw new RemoteAssetError("host_blocked", `Host ${hostname} is blocked`); + } + + if ( + options.allowedHosts.length > 0 && + !options.allowedHosts.includes(hostname) + ) { + console.warn("[remote-asset] blocked host", { + url: url.toString(), + reason: "host_not_allowed", + }); + throw new RemoteAssetError( + "host_not_allowed", + `Host ${hostname} is not in allow list`, + ); + } + + if (isIP(hostname)) { + if (isBlockedIp(hostname)) { + console.warn("[remote-asset] blocked ip", { + url: url.toString(), + reason: "private_ip", + }); + throw new RemoteAssetError( + "private_ip", + `IP ${hostname} is not reachable`, + ); + } + return; + } + + let records: Array<{ address: string; family: number }>; + try { + records = await dnsLookup(hostname, { all: true }); + } catch (err) { + console.warn("[remote-asset] dns error", { + url: url.toString(), + reason: err instanceof Error ? err.message : "dns_error", + }); + throw new RemoteAssetError( + "dns_error", + err instanceof Error ? err.message : "DNS lookup failed", + ); + } + + if (!records || records.length === 0) { + console.warn("[remote-asset] dns error", { + url: url.toString(), + reason: "no_records", + }); + throw new RemoteAssetError("dns_error", "DNS lookup returned no records"); + } + + if (records.some((record) => isBlockedIp(record.address))) { + console.warn("[remote-asset] blocked ip", { + url: url.toString(), + reason: "private_ip", + }); + throw new RemoteAssetError( + "private_ip", + `DNS for ${hostname} resolved to private address`, + ); + } +} + +/** + * Wrapper around `fetch` that adds an AbortController/timeout per request. + */ +async function timedFetch( + url: string, + opts: { headers?: HeadersInit; timeoutMs: number }, +): Promise { + const controller = new AbortController(); + const timer = setTimeout(() => controller.abort(), opts.timeoutMs); + try { + return await fetch(url, { + method: "GET", + headers: opts.headers, + redirect: "manual", + signal: controller.signal, + }); + } finally { + clearTimeout(timer); + } +} + +function isRedirect(response: Response): boolean { + return response.status >= 300 && response.status < 400; +} + +/** + * Incrementally read the response body, aborting if it exceeds the byte limit. + */ +async function readBodyWithLimit( + response: Response, + maxBytes: number, +): Promise { + if (!response.body) { + // No stream available (tiny body or mocked response); a simple check suffices. + const buf = Buffer.from(await response.arrayBuffer()); + if (buf.byteLength > maxBytes) { + throw new RemoteAssetError( + "size_exceeded", + `Remote asset exceeded ${maxBytes} bytes`, + ); + } + return buf; + } + + const reader = response.body.getReader(); + const chunks: Buffer[] = []; + let received = 0; + while (true) { + const { done, value } = await reader.read(); + if (done) break; + if (value) { + received += value.byteLength; + if (received > maxBytes) { + try { + reader.cancel(); + } catch { + // ignore + } + // Abort as soon as the limit is crossed to avoid buffering unbounded data. + throw new RemoteAssetError( + "size_exceeded", + `Remote asset exceeded ${maxBytes} bytes`, + ); + } + chunks.push(Buffer.from(value)); + } + } + return Buffer.concat(chunks, received); +} + +function isBlockedIp(address: string): boolean { + try { + const parsed = ipaddr.parse(address); + const range = parsed.range(); + return range !== "unicast"; + } catch { + return true; + } +} diff --git a/lib/fetch.test.ts b/lib/fetch.test.ts index f9626661..60da8a88 100644 --- a/lib/fetch.test.ts +++ b/lib/fetch.test.ts @@ -2,8 +2,7 @@ import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; import { fetchWithSelectiveRedirects, - fetchWithTimeout, - headThenGet, + fetchWithTimeoutAndRetry, } from "@/lib/fetch"; const originalFetch = globalThis.fetch; @@ -48,7 +47,7 @@ describe("lib/fetch", () => { async () => res as Response, ) as unknown as typeof fetch; - const out = await fetchWithTimeout( + const out = await fetchWithTimeoutAndRetry( "https://example.com", {}, { timeoutMs: 50 }, @@ -78,7 +77,7 @@ describe("lib/fetch", () => { }) as unknown as typeof fetch; await expect( - fetchWithTimeout("https://slow.test", {}, { timeoutMs: 10 }), + fetchWithTimeoutAndRetry("https://slow.test", {}, { timeoutMs: 10 }), ).rejects.toThrow("aborted"); }); }); @@ -91,7 +90,7 @@ describe("lib/fetch", () => { .mockResolvedValueOnce(res); globalThis.fetch = mock as unknown as typeof fetch; - const p = fetchWithTimeout( + const p = fetchWithTimeoutAndRetry( "https://flaky.test", {}, { @@ -107,43 +106,6 @@ describe("lib/fetch", () => { expect(mock).toHaveBeenCalledTimes(2); }, 5000); // 5s timeout for retry logic - it("headThenGet uses HEAD when ok", async () => { - const res = createResponse({ ok: true, status: 200 }); - const mock = vi.fn(async (_input, init) => { - if ((init as RequestInit | undefined)?.method === "HEAD") return res; - return createResponse({ ok: false, status: 500 }); - }); - globalThis.fetch = mock as unknown as typeof fetch; - - const { response, usedMethod } = await headThenGet( - "https://example.com", - {}, - { timeoutMs: 50 }, - ); - expect(response).toBe(res); - expect(usedMethod).toBe("HEAD"); - expect(mock).toHaveBeenCalledTimes(1); - }); - - it("headThenGet falls back to GET when HEAD fails", async () => { - const getRes = createResponse({ ok: true, status: 200 }); - const mock = vi.fn(async (_input, init) => { - if ((init as RequestInit | undefined)?.method === "HEAD") - return createResponse({ ok: false, status: 405 }); - return getRes; - }); - globalThis.fetch = mock as unknown as typeof fetch; - - const { response, usedMethod } = await headThenGet( - "https://example.com", - {}, - { timeoutMs: 50 }, - ); - expect(response).toBe(getRes); - expect(usedMethod).toBe("GET"); - expect(mock).toHaveBeenCalledTimes(2); - }); - describe("fetchWithSelectiveRedirects", () => { it("returns non-redirect responses immediately", async () => { const res = createResponse({ ok: true, status: 200 }); diff --git a/lib/fetch.ts b/lib/fetch.ts index 3a5d1d82..b6e44e2a 100644 --- a/lib/fetch.ts +++ b/lib/fetch.ts @@ -1,4 +1,8 @@ -export async function fetchWithTimeout( +/** + * Fetch a trusted upstream resource with a timeout and optional retries. + * Do not use this for user-controlled URLs; prefer the hardened remote asset helper. + */ +export async function fetchWithTimeoutAndRetry( input: RequestInfo | URL, init: RequestInit = {}, opts: { timeoutMs?: number; retries?: number; backoffMs?: number } = {}, @@ -6,19 +10,23 @@ export async function fetchWithTimeout( const timeoutMs = opts.timeoutMs ?? 5000; const retries = Math.max(0, opts.retries ?? 0); const backoffMs = Math.max(0, opts.backoffMs ?? 150); + const externalSignal = init.signal ?? undefined; let lastError: unknown = null; for (let attempt = 0; attempt <= retries; attempt++) { - const controller = new AbortController(); - const timer = setTimeout(() => controller.abort(), timeoutMs); + const { signal, cleanup } = createAbortSignal(timeoutMs, externalSignal); try { - const res = await fetch(input, { ...init, signal: controller.signal }); - clearTimeout(timer); + const res = await fetch(input, { ...init, signal }); + cleanup(); return res; } catch (err) { lastError = err; - clearTimeout(timer); + cleanup(); + if (externalSignal?.aborted) { + throw err instanceof Error ? err : new Error("fetch aborted"); + } if (attempt < retries) { + // Simple linear backoff — good enough for trusted upstream retry logic. await new Promise((r) => setTimeout(r, backoffMs)); } } @@ -26,33 +34,6 @@ export async function fetchWithTimeout( throw lastError instanceof Error ? lastError : new Error("fetch failed"); } -export async function headThenGet( - url: string, - init: RequestInit = {}, - opts: { timeoutMs?: number } = {}, -): Promise<{ response: Response; usedMethod: "HEAD" | "GET" }> { - const timeoutMs = opts.timeoutMs ?? 5000; - try { - const headRes = await fetchWithTimeout( - url, - { ...init, method: "HEAD", redirect: "follow" }, - { timeoutMs }, - ); - if (headRes.ok) { - return { response: headRes, usedMethod: "HEAD" }; - } - } catch { - // fall through to GET - } - - const getRes = await fetchWithTimeout( - url, - { ...init, method: "GET", redirect: "follow" }, - { timeoutMs }, - ); - return { response: getRes, usedMethod: "GET" }; -} - /** * Determines if a redirect should be followed based on selective redirect rules. * Allows redirects: @@ -63,7 +44,7 @@ export async function headThenGet( * 5. With different hash fragments (e.g., example.com -> example.com/#content) * * Blocks redirects: - * - To different domains (after normalizing www) + * - To different hostnames (after normalizing www) */ function isAllowedRedirect(fromUrl: string, toUrl: string): boolean { try { @@ -75,7 +56,7 @@ function isAllowedRedirect(fromUrl: string, toUrl: string): boolean { const fromHost = normalizeHost(from.hostname); const toHost = normalizeHost(to.hostname); - // Must be the same registrable domain (after removing www) + // Must be the same hostname (after removing www). Subdomains remain blocked. if (fromHost !== toHost) { return false; } @@ -154,3 +135,80 @@ export async function fetchWithSelectiveRedirects( `Too many redirects (${maxRedirects}) when fetching ${currentUrl}`, ); } + +function createAbortSignal( + timeoutMs: number, + externalSignal?: AbortSignal, +): { signal: AbortSignal; cleanup: () => void } { + const timeoutController = new AbortController(); + const timeoutId = setTimeout(() => timeoutController.abort(), timeoutMs); + const cleanupFns: Array<() => void> = [() => clearTimeout(timeoutId)]; + + const runCleanup = () => { + for (const fn of cleanupFns) { + fn(); + } + }; + + if (!externalSignal) { + return { + signal: timeoutController.signal, + cleanup: runCleanup, + }; + } + + const abortSignalAny = ( + AbortSignal as typeof AbortSignal & { + any?: (signals: AbortSignal[]) => AbortSignal; + } + ).any; + + if (typeof abortSignalAny === "function") { + return { + signal: abortSignalAny([externalSignal, timeoutController.signal]), + cleanup: runCleanup, + }; + } + + const combinedController = new AbortController(); + const abortCombined = (reason?: unknown) => { + if (!combinedController.signal.aborted) { + try { + // biome-ignore lint/suspicious/noExplicitAny: not critical + combinedController.abort(reason as any); + } catch { + combinedController.abort(); + } + } + }; + + const onExternalAbort = () => + abortCombined((externalSignal as { reason?: unknown }).reason); + const onTimeoutAbort = () => + abortCombined((timeoutController.signal as { reason?: unknown }).reason); + + if (externalSignal.aborted) { + onExternalAbort(); + } else { + externalSignal.addEventListener("abort", onExternalAbort, { once: true }); + cleanupFns.push(() => + externalSignal.removeEventListener("abort", onExternalAbort), + ); + } + + if (timeoutController.signal.aborted) { + onTimeoutAbort(); + } else { + timeoutController.signal.addEventListener("abort", onTimeoutAbort, { + once: true, + }); + cleanupFns.push(() => + timeoutController.signal.removeEventListener("abort", onTimeoutAbort), + ); + } + + return { + signal: combinedController.signal, + cleanup: runCleanup, + }; +} diff --git a/server/services/dns.ts b/server/services/dns.ts index 18d38892..29d6cab8 100644 --- a/server/services/dns.ts +++ b/server/services/dns.ts @@ -7,7 +7,7 @@ import { replaceDns } from "@/lib/db/repos/dns"; import { findDomainByName } from "@/lib/db/repos/domains"; import { dnsRecords } from "@/lib/db/schema"; import { toRegistrableDomain } from "@/lib/domain-server"; -import { fetchWithTimeout } from "@/lib/fetch"; +import { fetchWithTimeoutAndRetry } from "@/lib/fetch"; import { ns, redis } from "@/lib/redis"; import { scheduleRevalidation } from "@/lib/schedule"; import { @@ -606,7 +606,8 @@ async function resolveTypeWithProvider( provider: DohProvider, ): Promise { const url = buildDohUrl(provider, domain, type); - const res = await fetchWithTimeout( + // Each DoH call is potentially flaky; short timeout + single retry keeps latency bounded. + const res = await fetchWithTimeoutAndRetry( url, { headers: { ...DEFAULT_HEADERS, ...provider.headers }, diff --git a/server/services/favicon.test.ts b/server/services/favicon.test.ts index 637bc5b5..947fa16a 100644 --- a/server/services/favicon.test.ts +++ b/server/services/favicon.test.ts @@ -8,6 +8,7 @@ import { it, vi, } from "vitest"; +import { RemoteAssetError } from "@/lib/fetch-remote-asset"; // Mock toRegistrableDomain to allow .invalid and .example domains for testing vi.mock("@/lib/domain-server", async () => { @@ -35,7 +36,26 @@ const storageMock = vi.hoisted(() => ({ getFaviconTtlSeconds: vi.fn(() => 60), })); +const fetchRemoteAssetMock = vi.hoisted(() => + // Shared stub so we can flip between success + failure scenarios quickly. + vi.fn(async () => ({ + buffer: Buffer.from([1, 2, 3, 4]), + contentType: "image/png", + finalUrl: "https://example.com/favicon.ico", + status: 200, + })), +); + vi.mock("@/lib/storage", () => storageMock); +vi.mock("@/lib/fetch-remote-asset", async () => { + const actual = await vi.importActual< + typeof import("@/lib/fetch-remote-asset") + >("@/lib/fetch-remote-asset"); + return { + ...actual, + fetchRemoteAsset: fetchRemoteAssetMock, + }; +}); vi.stubEnv("BLOB_READ_WRITE_TOKEN", "test-token"); // Mock sharp to return a pipeline that resolves a buffer (now using webp) @@ -67,6 +87,7 @@ beforeAll(async () => { afterEach(async () => { vi.restoreAllMocks(); storageMock.storeImage.mockReset(); + fetchRemoteAssetMock.mockReset(); const { resetPGliteDb } = await import("@/lib/db/pglite"); await resetPGliteDb(); const { resetInMemoryRedis } = await import("@/lib/redis-mock"); @@ -104,44 +125,42 @@ describe("getOrCreateFaviconBlobUrl", () => { }); it("fetches, converts, stores, and returns url when not cached", async () => { - const body = new Uint8Array([137, 80, 78, 71]); // pretend PNG signature bytes - const resp = new Response(body, { - status: 200, - headers: { "content-type": "image/png" }, - }); - const fetchSpy = vi.spyOn(global, "fetch").mockResolvedValue(resp); - const out = await getOrCreateFaviconBlobUrl("example.com"); expect(out.url).toMatch( /^https:\/\/.*\.blob\.vercel-storage\.com\/[a-f0-9]{32}\/32x32\.webp$/, ); + expect(fetchRemoteAssetMock).toHaveBeenCalled(); expect(storageMock.storeImage).toHaveBeenCalled(); - fetchSpy.mockRestore(); }, 10000); // 10s timeout for network + image processing it("returns null when all sources fail", async () => { - const notOk = new Response(null, { status: 404 }); - const fetchSpy = vi.spyOn(global, "fetch").mockResolvedValue(notOk); + fetchRemoteAssetMock.mockRejectedValue( + new RemoteAssetError("response_error", "Not found", 404), + ); const out = await getOrCreateFaviconBlobUrl("nope.invalid"); expect(out.url).toBeNull(); - fetchSpy.mockRestore(); + expect(fetchRemoteAssetMock).toHaveBeenCalled(); }, 10000); // 10s timeout for multiple fetch attempts it("negative-caches failures to avoid repeat fetch", async () => { - const notOk = new Response(null, { status: 404 }); - const fetchSpy = vi.spyOn(global, "fetch").mockResolvedValue(notOk); + const mkError = () => + new RemoteAssetError("response_error", "Not found", 404); + // First invocation will try up to four sources; force each one to fail exactly once. + fetchRemoteAssetMock + .mockRejectedValueOnce(mkError()) + .mockRejectedValueOnce(mkError()) + .mockRejectedValueOnce(mkError()) + .mockRejectedValueOnce(mkError()); // First call: miss -> fetch attempts -> negative cache const first = await getOrCreateFaviconBlobUrl("negcache.example"); expect(first.url).toBeNull(); - expect(fetchSpy).toHaveBeenCalled(); + expect(fetchRemoteAssetMock).toHaveBeenCalled(); // Second call: should hit negative cache and not fetch again - fetchSpy.mockClear(); + fetchRemoteAssetMock.mockClear(); const second = await getOrCreateFaviconBlobUrl("negcache.example"); expect(second.url).toBeNull(); - expect(fetchSpy).not.toHaveBeenCalled(); - - fetchSpy.mockRestore(); + expect(fetchRemoteAssetMock).not.toHaveBeenCalled(); }, 10000); // 10s timeout for multiple fetch attempts }); diff --git a/server/services/favicon.ts b/server/services/favicon.ts index 2744095c..cb313d4a 100644 --- a/server/services/favicon.ts +++ b/server/services/favicon.ts @@ -3,13 +3,14 @@ import { USER_AGENT } from "@/lib/constants/app"; import { ensureDomainRecord } from "@/lib/db/repos/domains"; import { getFaviconByDomain, upsertFavicon } from "@/lib/db/repos/favicons"; import { toRegistrableDomain } from "@/lib/domain-server"; -import { fetchWithTimeout } from "@/lib/fetch"; +import { fetchRemoteAsset, RemoteAssetError } from "@/lib/fetch-remote-asset"; import { convertBufferToImageCover } from "@/lib/image"; import { storeImage } from "@/lib/storage"; import { ttlForFavicon } from "@/lib/ttl"; const DEFAULT_SIZE = 32; const REQUEST_TIMEOUT_MS = 1500; // per each method +const MAX_FAVICON_BYTES = 1 * 1024 * 1024; // 1MB function buildSources(domain: string): string[] { const enc = encodeURIComponent(domain); @@ -56,32 +57,25 @@ async function fetchFaviconInternal( for (const src of sources) { try { - const res = await fetchWithTimeout( - src, - { - redirect: "follow", - headers: { - Accept: "image/avif,image/webp,image/png,image/*;q=0.9,*/*;q=0.8", - "User-Agent": USER_AGENT, - }, + const asset = await fetchRemoteAsset({ + url: src, + headers: { + Accept: "image/avif,image/webp,image/png,image/*;q=0.9,*/*;q=0.8", + "User-Agent": USER_AGENT, }, - { timeoutMs: REQUEST_TIMEOUT_MS }, - ); - if (!res.ok) { - // Track if this was a 404 (not found) vs other error - if (res.status !== 404) { - allNotFound = false; // Server error, timeout, etc. - not a true "not found" - } - continue; - } - const contentType = res.headers.get("content-type"); - const ab = await res.arrayBuffer(); - const buf = Buffer.from(ab); + maxBytes: MAX_FAVICON_BYTES, + timeoutMs: REQUEST_TIMEOUT_MS, + maxRedirects: 2, + allowHttp: src.startsWith("http://"), + }); + allNotFound = false; + const buf = asset.buffer; + // Normalize everything to a consistent WebP size so we don't leak arbitrary formats downstream. const webp = await convertBufferToImageCover( buf, DEFAULT_SIZE, DEFAULT_SIZE, - contentType, + asset.contentType, ); if (!webp) continue; const { url, pathname } = await storeImage({ @@ -112,8 +106,8 @@ async function fetchFaviconInternal( size: DEFAULT_SIZE, source, notFound: false, - upstreamStatus: res.status, - upstreamContentType: contentType ?? null, + upstreamStatus: asset.status, + upstreamContentType: asset.contentType ?? null, fetchedAt: now, expiresAt, }); @@ -125,9 +119,17 @@ async function fetchFaviconInternal( } return { url }; - } catch { + } catch (err) { + if ( + err instanceof RemoteAssetError && + err.code === "response_error" && + err.status === 404 + ) { + // still considered a true "not found" + } else { + allNotFound = false; + } // Network error, timeout, etc. - not a true "not found" - allNotFound = false; // try next source } } diff --git a/server/services/seo.test.ts b/server/services/seo.test.ts index f04a871a..6223ac55 100644 --- a/server/services/seo.test.ts +++ b/server/services/seo.test.ts @@ -22,10 +22,30 @@ const dnsLookupMock = vi.hoisted(() => vi.fn(async () => [{ address: "203.0.113.10", family: 4 }]), ); +const fetchRemoteAssetMock = vi.hoisted(() => + // We don't care about the actual image buffer in most tests, only that the helper is invoked. + vi.fn(async () => ({ + buffer: Buffer.from([1, 2, 3]), + contentType: "image/png", + finalUrl: "https://example.com/og.png", + status: 200, + })), +); + vi.mock("node:dns/promises", () => ({ lookup: dnsLookupMock, })); +vi.mock("@/lib/fetch-remote-asset", async () => { + const actual = await vi.importActual< + typeof import("@/lib/fetch-remote-asset") + >("@/lib/fetch-remote-asset"); + return { + ...actual, + fetchRemoteAsset: fetchRemoteAssetMock, + }; +}); + import { afterAll, afterEach, @@ -79,6 +99,7 @@ beforeEach(async () => { afterEach(async () => { vi.clearAllMocks(); + fetchRemoteAssetMock.mockReset(); const { resetInMemoryRedis } = await import("@/lib/redis-mock"); resetInMemoryRedis(); }); @@ -192,20 +213,13 @@ describe("getSeo", () => { ) .mockResolvedValueOnce( textResponse("User-agent: *\nAllow: /", "text/plain"), - ) - .mockResolvedValueOnce({ - ok: false, - status: 404, - headers: new Headers({ "content-type": "text/plain" }), - arrayBuffer: async () => new ArrayBuffer(0), - url: "", - } as unknown as Response); + ); + fetchRemoteAssetMock.mockRejectedValueOnce(new Error("upload failed")); const out = await getSeo("img-fail.invalid"); - // original image remains for Meta Tags display expect(out.preview?.image ?? "").toContain("/og.png"); - // uploaded url is null on failure for privacy-safe rendering expect(out.preview?.imageUploaded ?? null).toBeNull(); + expect(fetchRemoteAssetMock).toHaveBeenCalledTimes(1); fetchMock.mockRestore(); }); @@ -248,20 +262,17 @@ describe("getSeo", () => { ) .mockResolvedValueOnce( textResponse("User-agent: *\nAllow: /", "text/plain"), - ) - .mockResolvedValueOnce({ - ok: true, - status: 200, - headers: new Headers({ "content-type": "image/png" }), - arrayBuffer: async () => new ArrayBuffer(100), - url: "", - } as unknown as Response); + ); const out = await getSeo("relative-url.invalid"); - // Relative URL should be resolved to absolute expect(out.preview?.image).toBe("https://example.com/images/og.png"); - // Should have attempted to upload the resolved URL - expect(fetchMock).toHaveBeenCalledTimes(3); // HTML + robots.txt + image + expect(fetchRemoteAssetMock).toHaveBeenCalledTimes(1); + expect(fetchRemoteAssetMock).toHaveBeenCalledWith( + expect.objectContaining({ + url: "https://example.com/images/og.png", + }), + ); + expect(fetchMock).toHaveBeenCalledTimes(2); fetchMock.mockRestore(); }); @@ -280,10 +291,11 @@ describe("getSeo", () => { .mockResolvedValueOnce( textResponse("User-agent: *\nAllow: /", "text/plain"), ); + fetchRemoteAssetMock.mockRejectedValueOnce(new Error("blocked")); const out = await getSeo("loopback.invalid"); expect(out.preview?.imageUploaded).toBeNull(); - // Only HTML + robots fetch should occur (image fetch blocked) + expect(fetchRemoteAssetMock).toHaveBeenCalledTimes(1); expect(fetchMock).toHaveBeenCalledTimes(2); fetchMock.mockRestore(); }); @@ -305,9 +317,11 @@ describe("getSeo", () => { .mockResolvedValueOnce( textResponse("User-agent: *\nAllow: /", "text/plain"), ); + fetchRemoteAssetMock.mockRejectedValueOnce(new Error("blocked")); const out = await getSeo("private-resolve.invalid"); expect(out.preview?.imageUploaded).toBeNull(); + expect(fetchRemoteAssetMock).toHaveBeenCalledTimes(1); expect(fetchMock).toHaveBeenCalledTimes(2); fetchMock.mockRestore(); }); diff --git a/server/services/seo.ts b/server/services/seo.ts index 1d507415..aaa4e2e9 100644 --- a/server/services/seo.ts +++ b/server/services/seo.ts @@ -1,6 +1,4 @@ -import { lookup as dnsLookup } from "node:dns/promises"; import { eq } from "drizzle-orm"; -import * as ipaddr from "ipaddr.js"; import { after } from "next/server"; import { USER_AGENT } from "@/lib/constants/app"; import { db } from "@/lib/db/client"; @@ -8,7 +6,11 @@ import { findDomainByName } from "@/lib/db/repos/domains"; import { upsertSeo } from "@/lib/db/repos/seo"; import { seo as seoTable } from "@/lib/db/schema"; import { toRegistrableDomain } from "@/lib/domain-server"; -import { fetchWithSelectiveRedirects, fetchWithTimeout } from "@/lib/fetch"; +import { + fetchWithSelectiveRedirects, + fetchWithTimeoutAndRetry, +} from "@/lib/fetch"; +import { fetchRemoteAsset } from "@/lib/fetch-remote-asset"; import { optimizeImageCover } from "@/lib/image"; import { scheduleRevalidation } from "@/lib/schedule"; import type { @@ -25,8 +27,6 @@ import { ttlForSeo } from "@/lib/ttl"; const SOCIAL_WIDTH = 1200; const SOCIAL_HEIGHT = 630; const MAX_REMOTE_IMAGE_BYTES = 5 * 1024 * 1024; // 5MB -const BLOCKED_HOSTNAMES = new Set(["localhost"]); -const BLOCKED_HOST_SUFFIXES = [".internal", ".local", ".localhost"]; export async function getSeo(domain: string): Promise { console.debug(`[seo] start ${domain}`); @@ -129,7 +129,7 @@ export async function getSeo(domain: string): Promise { // HTML fetch try { - const res = await fetchWithTimeout( + const res = await fetchWithTimeoutAndRetry( finalUrl, { method: "GET", @@ -191,25 +191,38 @@ export async function getSeo(domain: string): Promise { let uploadedImageUrl: string | null = null; if (preview?.image) { - let pageHost: string | null = null; try { - pageHost = new URL(finalUrl).hostname.toLowerCase(); - } catch { - pageHost = null; - } - - try { - uploadedImageUrl = await generateSocialPreviewBlob( - registrable, - preview.image, - { - allowHosts: pageHost ? [pageHost] : [], + // Always proxy OG images through Blob storage so we control caching/privacy. + const asset = await fetchRemoteAsset({ + url: preview.image, + currentUrl: finalUrl, + headers: { + Accept: + "image/avif,image/webp,image/png,image/jpeg,image/*;q=0.9,*/*;q=0.8", + "User-Agent": USER_AGENT, }, + maxBytes: MAX_REMOTE_IMAGE_BYTES, + timeoutMs: 8000, + maxRedirects: 3, + }); + const optimized = await optimizeImageCover( + asset.buffer, + SOCIAL_WIDTH, + SOCIAL_HEIGHT, ); - // Preserve original image URL for meta display; attach uploaded URL for rendering - preview.imageUploaded = uploadedImageUrl; + if (!optimized || optimized.length === 0) { + throw new Error("Failed to optimize image"); + } + const { url } = await storeImage({ + kind: "opengraph", + domain: registrable, + buffer: optimized, + width: SOCIAL_WIDTH, + height: SOCIAL_HEIGHT, + }); + uploadedImageUrl = url; + preview.imageUploaded = url; } catch { - // On failure, avoid rendering external image URL preview.imageUploaded = null; } } @@ -274,132 +287,3 @@ export async function getSeo(domain: string): Promise { return response; } - -/** - * Generate and upload social preview blob from external image URL. - * Returns the blob URL or null on failure. - */ -async function generateSocialPreviewBlob( - domain: string, - imageUrl: string, - opts?: { allowHosts?: string[] }, -): Promise { - // Guard against non-http(s) schemes to avoid SSRF or unsupported fetches - let parsed: URL; - try { - parsed = new URL(imageUrl); - } catch { - // Invalid URL - return null; - } - - if (!(await allowRemoteImageUrl(parsed, opts))) { - return null; - } - - const lower = domain.toLowerCase(); - - try { - const res = await fetchWithTimeout( - parsed.toString(), - { - method: "GET", - headers: { - Accept: - "image/avif,image/webp,image/png,image/jpeg,image/*;q=0.9,*/*;q=0.8", - "User-Agent": USER_AGENT, - }, - }, - { timeoutMs: 8000 }, - ); - - if (!res.ok) return null; - - const contentType = res.headers.get("content-type") ?? ""; - if (!/image\//.test(contentType)) return null; - - const contentLengthHeader = res.headers.get("content-length"); - if (contentLengthHeader) { - const declared = Number(contentLengthHeader); - if (Number.isFinite(declared) && declared > MAX_REMOTE_IMAGE_BYTES) { - return null; - } - } - - const ab = await res.arrayBuffer(); - if (ab.byteLength > MAX_REMOTE_IMAGE_BYTES) { - return null; - } - - const raw = Buffer.from(ab); - - const image = await optimizeImageCover(raw, SOCIAL_WIDTH, SOCIAL_HEIGHT); - if (!image || image.length === 0) return null; - - const { url } = await storeImage({ - kind: "opengraph", - domain: lower, - buffer: image, - width: SOCIAL_WIDTH, - height: SOCIAL_HEIGHT, - }); - - return url; - } catch { - return null; - } -} - -async function allowRemoteImageUrl( - url: URL, - opts?: { allowHosts?: string[] }, -): Promise { - if (url.protocol !== "https:") { - return false; - } - - if (url.username || url.password) { - return false; - } - - const hostname = url.hostname.trim().toLowerCase(); - const allowHosts = - opts?.allowHosts?.map((h) => h.trim().toLowerCase()).filter(Boolean) ?? []; - - if ( - hostname === "" || - BLOCKED_HOSTNAMES.has(hostname) || - BLOCKED_HOST_SUFFIXES.some((suffix) => hostname.endsWith(suffix)) - ) { - return false; - } - - if (allowHosts.includes(hostname)) { - return true; - } - - if (ipaddr.isValid(hostname)) { - return !isBlockedIp(hostname); - } - - try { - const records = await dnsLookup(hostname, { all: true }); - if (!records || records.length === 0) { - return false; - } - return !records.some((record) => isBlockedIp(record.address)); - } catch { - return false; - } -} - -function isBlockedIp(address: string): boolean { - try { - const parsed = ipaddr.parse(address); - const range = parsed.range(); - // ipaddr marks public IPv4/IPv6 networks as "unicast" - return range !== "unicast"; - } catch { - return true; - } -}