From 0b6bf3aefda06ab2b94d1d5a693ec31a769129b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Victor=20Mu=C5=A1tar?= Date: Mon, 24 Nov 2025 14:47:07 +0100 Subject: [PATCH 1/4] Preserve original content-type in fetch-url API The API now returns the content-type from the fetched response instead of always using text/plain. This allows clients to receive the correct content type for the requested resource. --- src/routes/api/fetch-url/+server.ts | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/routes/api/fetch-url/+server.ts b/src/routes/api/fetch-url/+server.ts index 85cbe1d7c06..d680c627911 100644 --- a/src/routes/api/fetch-url/+server.ts +++ b/src/routes/api/fetch-url/+server.ts @@ -51,8 +51,7 @@ export async function GET({ url }) { } // Stream the response back - // Always return as text/plain to prevent any HTML/JS execution - const contentType = "text/plain; charset=utf-8"; + const contentType = response.headers.get("content-type") || "application/octet-stream"; const contentDisposition = response.headers.get("content-disposition"); const headers: HeadersInit = { From fd97719c20f2e2cc11eb84f5489842e6e4e50154 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Victor=20Mu=C5=A1tar?= Date: Mon, 24 Nov 2025 15:26:48 +0100 Subject: [PATCH 2/4] Improve MIME type handling for URL file fetches Adds a utility for safer MIME type inference and selection, using file extensions and forwarded headers. Updates file fetch logic in UrlFetchModal and loadAttachmentsFromUrls to use the new helper, improving filename extraction and MIME type accuracy. The API endpoint now always returns 'text/plain' for safety, exposing the original content type in a custom header. --- src/lib/components/chat/UrlFetchModal.svelte | 32 +++++++---- src/lib/utils/loadAttachmentsFromUrls.ts | 21 ++++++-- src/lib/utils/mime.ts | 56 ++++++++++++++++++++ src/routes/api/fetch-url/+server.ts | 7 ++- 4 files changed, 101 insertions(+), 15 deletions(-) create mode 100644 src/lib/utils/mime.ts diff --git a/src/lib/components/chat/UrlFetchModal.svelte b/src/lib/components/chat/UrlFetchModal.svelte index 3dd4ca6c059..67f5424cae2 100644 --- a/src/lib/components/chat/UrlFetchModal.svelte +++ b/src/lib/components/chat/UrlFetchModal.svelte @@ -2,6 +2,7 @@ import Modal from "../Modal.svelte"; import { base } from "$app/paths"; import { tick } from "svelte"; + import { pickSafeMime } from "$lib/utils/mime"; interface Props { open?: boolean; @@ -81,23 +82,36 @@ const txt = await res.text(); throw new Error(txt || `Failed to fetch (${res.status})`); } + const forwardedType = + res.headers.get("x-forwarded-content-type") || res.headers.get("x-original-content-type"); const blob = await res.blob(); + const mimeType = pickSafeMime(forwardedType, blob.type, trimmed); // Optional client-side mime filter (same wildcard semantics as dropzone) - if (acceptMimeTypes.length > 0 && blob.type && !matchesAllowed(blob.type, acceptMimeTypes)) { + if (acceptMimeTypes.length > 0 && mimeType && !matchesAllowed(mimeType, acceptMimeTypes)) { throw new Error("File type not allowed."); } const disp = res.headers.get("content-disposition"); - let filename = "attachment"; - const match = disp?.match(/filename[^;=\n]*=((['"]).*?\2|[^;\n]*)/); - if (match && match[1]) filename = match[1].replace(/['"]/g, ""); - else { + const filename = (() => { + const filenameStar = disp?.match(/filename\*=UTF-8''([^;]+)/i)?.[1]; + if (filenameStar) { + const cleaned = filenameStar.trim().replace(/['"]/g, ""); + try { + return decodeURIComponent(cleaned); + } catch { + return cleaned; + } + } + const filenameMatch = disp?.match(/filename="?([^";]+)"?/i)?.[1]; + if (filenameMatch) return filenameMatch.trim(); try { const u = new URL(trimmed); const last = u.pathname.split("/").pop() || "attachment"; - filename = decodeURIComponent(last); - } catch {} - } - const file = new File([blob], filename, { type: blob.type || "application/octet-stream" }); + return decodeURIComponent(last); + } catch { + return "attachment"; + } + })(); + const file = new File([blob], filename, { type: mimeType }); onfiles?.([file]); close(); } catch (e) { diff --git a/src/lib/utils/loadAttachmentsFromUrls.ts b/src/lib/utils/loadAttachmentsFromUrls.ts index c56f2f64ed6..36ca4f0f484 100644 --- a/src/lib/utils/loadAttachmentsFromUrls.ts +++ b/src/lib/utils/loadAttachmentsFromUrls.ts @@ -1,4 +1,5 @@ import { base } from "$app/paths"; +import { pickSafeMime } from "$lib/utils/mime"; export interface AttachmentLoadResult { files: File[]; @@ -31,10 +32,18 @@ function parseAttachmentUrls(searchParams: URLSearchParams): string[] { function extractFilename(url: string, contentDisposition?: string | null): string { // Try to get filename from Content-Disposition header if (contentDisposition) { - const match = contentDisposition.match(/filename[^;=\n]*=((['"]).*?\2|[^;\n]*)/); - if (match && match[1]) { - return match[1].replace(/['"]/g, ""); + const filenameStar = contentDisposition.match(/filename\*=UTF-8''([^;]+)/i)?.[1]; + if (filenameStar) { + const cleaned = filenameStar.trim().replace(/['"]/g, ""); + try { + return decodeURIComponent(cleaned); + } catch { + return cleaned; + } } + + const match = contentDisposition.match(/filename[^;=\n]*=((['"]).*?\2|[^;\n]*)/); + if (match && match[1]) return match[1].replace(/['"]/g, ""); } // Fallback: extract from URL @@ -82,13 +91,17 @@ export async function loadAttachmentsFromUrls( return; } + const forwardedType = + response.headers.get("x-forwarded-content-type") || + response.headers.get("x-original-content-type"); const blob = await response.blob(); + const mimeType = pickSafeMime(forwardedType, blob.type, url); const contentDisposition = response.headers.get("content-disposition"); const filename = extractFilename(url, contentDisposition); // Create File object const file = new File([blob], filename, { - type: blob.type || "application/octet-stream", + type: mimeType, }); files.push(file); diff --git a/src/lib/utils/mime.ts b/src/lib/utils/mime.ts new file mode 100644 index 00000000000..0c4c3e4c61e --- /dev/null +++ b/src/lib/utils/mime.ts @@ -0,0 +1,56 @@ +// Lightweight MIME helpers to avoid new dependencies. + +const EXTENSION_TO_MIME: Record = { + png: "image/png", + jpg: "image/jpeg", + jpe: "image/jpeg", + jpeg: "image/jpeg", + gif: "image/gif", + webp: "image/webp", + svg: "image/svg+xml", + pdf: "application/pdf", + txt: "text/plain", + csv: "text/csv", + json: "application/json", + mp3: "audio/mpeg", + wav: "audio/wav", + ogg: "audio/ogg", + mp4: "video/mp4", + mov: "video/quicktime", + webm: "video/webm", + zip: "application/zip", + gz: "application/gzip", + tgz: "application/gzip", + tar: "application/x-tar", + html: "text/html", + htm: "text/html", + md: "text/markdown", +}; + +export function guessMimeFromUrl(url: string): string | undefined { + try { + const pathname = new URL(url).pathname; + const ext = pathname.split(".").pop()?.toLowerCase(); + if (ext && EXTENSION_TO_MIME[ext]) return EXTENSION_TO_MIME[ext]; + } catch { + /* ignore */ + } + return undefined; +} + +export function pickSafeMime( + forwardedType: string | null, + blobType: string | undefined, + url: string +): string { + const inferred = guessMimeFromUrl(url); + if (forwardedType) return forwardedType; + if ( + inferred && + (!blobType || blobType === "application/octet-stream" || blobType.startsWith("text/plain")) + ) { + return inferred; + } + if (blobType) return blobType; + return inferred || "application/octet-stream"; +} diff --git a/src/routes/api/fetch-url/+server.ts b/src/routes/api/fetch-url/+server.ts index d680c627911..cfb83bb67e8 100644 --- a/src/routes/api/fetch-url/+server.ts +++ b/src/routes/api/fetch-url/+server.ts @@ -51,11 +51,14 @@ export async function GET({ url }) { } // Stream the response back - const contentType = response.headers.get("content-type") || "application/octet-stream"; + const originalContentType = response.headers.get("content-type") || "application/octet-stream"; + // Send as text/plain for safety; expose the original type via secondary header + const safeContentType = "text/plain; charset=utf-8"; const contentDisposition = response.headers.get("content-disposition"); const headers: HeadersInit = { - "Content-Type": contentType, + "Content-Type": safeContentType, + "X-Forwarded-Content-Type": originalContentType, "Cache-Control": "public, max-age=3600", ...(contentDisposition ? { "Content-Disposition": contentDisposition } : {}), ...SECURITY_HEADERS, From f6df21a0c03d6c344593f0832a368594086cb643 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Victor=20Mu=C5=A1tar?= Date: Mon, 24 Nov 2025 15:55:28 +0100 Subject: [PATCH 3/4] Update src/lib/components/chat/UrlFetchModal.svelte Co-authored-by: Eliott C. --- src/lib/components/chat/UrlFetchModal.svelte | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib/components/chat/UrlFetchModal.svelte b/src/lib/components/chat/UrlFetchModal.svelte index 67f5424cae2..634984def5f 100644 --- a/src/lib/components/chat/UrlFetchModal.svelte +++ b/src/lib/components/chat/UrlFetchModal.svelte @@ -83,7 +83,7 @@ throw new Error(txt || `Failed to fetch (${res.status})`); } const forwardedType = - res.headers.get("x-forwarded-content-type") || res.headers.get("x-original-content-type"); + res.headers.get("x-forwarded-content-type"); const blob = await res.blob(); const mimeType = pickSafeMime(forwardedType, blob.type, trimmed); // Optional client-side mime filter (same wildcard semantics as dropzone) From eb28e6b60dfa99cd1318a85e73054b74e54c935c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Victor=20Mu=C5=A1tar?= Date: Mon, 24 Nov 2025 15:55:33 +0100 Subject: [PATCH 4/4] Update src/lib/utils/loadAttachmentsFromUrls.ts Co-authored-by: Eliott C. --- src/lib/utils/loadAttachmentsFromUrls.ts | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/lib/utils/loadAttachmentsFromUrls.ts b/src/lib/utils/loadAttachmentsFromUrls.ts index 36ca4f0f484..da6e11d41d1 100644 --- a/src/lib/utils/loadAttachmentsFromUrls.ts +++ b/src/lib/utils/loadAttachmentsFromUrls.ts @@ -92,8 +92,7 @@ export async function loadAttachmentsFromUrls( } const forwardedType = - response.headers.get("x-forwarded-content-type") || - response.headers.get("x-original-content-type"); + response.headers.get("x-forwarded-content-type"); const blob = await response.blob(); const mimeType = pickSafeMime(forwardedType, blob.type, url); const contentDisposition = response.headers.get("content-disposition");