diff --git a/src/lib/components/chat/UrlFetchModal.svelte b/src/lib/components/chat/UrlFetchModal.svelte index 3dd4ca6c059..634984def5f 100644 --- a/src/lib/components/chat/UrlFetchModal.svelte +++ b/src/lib/components/chat/UrlFetchModal.svelte @@ -2,6 +2,7 @@ import Modal from "../Modal.svelte"; import { base } from "$app/paths"; import { tick } from "svelte"; + import { pickSafeMime } from "$lib/utils/mime"; interface Props { open?: boolean; @@ -81,23 +82,36 @@ const txt = await res.text(); throw new Error(txt || `Failed to fetch (${res.status})`); } + const forwardedType = + res.headers.get("x-forwarded-content-type"); const blob = await res.blob(); + const mimeType = pickSafeMime(forwardedType, blob.type, trimmed); // Optional client-side mime filter (same wildcard semantics as dropzone) - if (acceptMimeTypes.length > 0 && blob.type && !matchesAllowed(blob.type, acceptMimeTypes)) { + if (acceptMimeTypes.length > 0 && mimeType && !matchesAllowed(mimeType, acceptMimeTypes)) { throw new Error("File type not allowed."); } const disp = res.headers.get("content-disposition"); - let filename = "attachment"; - const match = disp?.match(/filename[^;=\n]*=((['"]).*?\2|[^;\n]*)/); - if (match && match[1]) filename = match[1].replace(/['"]/g, ""); - else { + const filename = (() => { + const filenameStar = disp?.match(/filename\*=UTF-8''([^;]+)/i)?.[1]; + if (filenameStar) { + const cleaned = filenameStar.trim().replace(/['"]/g, ""); + try { + return decodeURIComponent(cleaned); + } catch { + return cleaned; + } + } + const filenameMatch = disp?.match(/filename="?([^";]+)"?/i)?.[1]; + if (filenameMatch) return filenameMatch.trim(); try { const u = new URL(trimmed); const last = u.pathname.split("/").pop() || "attachment"; - filename = decodeURIComponent(last); - } catch {} - } - const file = new File([blob], filename, { type: blob.type || "application/octet-stream" }); + return decodeURIComponent(last); + } catch { + return "attachment"; + } + })(); + const file = new File([blob], filename, { type: mimeType }); onfiles?.([file]); close(); } catch (e) { diff --git a/src/lib/utils/loadAttachmentsFromUrls.ts b/src/lib/utils/loadAttachmentsFromUrls.ts index c56f2f64ed6..da6e11d41d1 100644 --- a/src/lib/utils/loadAttachmentsFromUrls.ts +++ b/src/lib/utils/loadAttachmentsFromUrls.ts @@ -1,4 +1,5 @@ import { base } from "$app/paths"; +import { pickSafeMime } from "$lib/utils/mime"; export interface AttachmentLoadResult { files: File[]; @@ -31,10 +32,18 @@ function parseAttachmentUrls(searchParams: URLSearchParams): string[] { function extractFilename(url: string, contentDisposition?: string | null): string { // Try to get filename from Content-Disposition header if (contentDisposition) { - const match = contentDisposition.match(/filename[^;=\n]*=((['"]).*?\2|[^;\n]*)/); - if (match && match[1]) { - return match[1].replace(/['"]/g, ""); + const filenameStar = contentDisposition.match(/filename\*=UTF-8''([^;]+)/i)?.[1]; + if (filenameStar) { + const cleaned = filenameStar.trim().replace(/['"]/g, ""); + try { + return decodeURIComponent(cleaned); + } catch { + return cleaned; + } } + + const match = contentDisposition.match(/filename[^;=\n]*=((['"]).*?\2|[^;\n]*)/); + if (match && match[1]) return match[1].replace(/['"]/g, ""); } // Fallback: extract from URL @@ -82,13 +91,16 @@ export async function loadAttachmentsFromUrls( return; } + const forwardedType = + response.headers.get("x-forwarded-content-type"); const blob = await response.blob(); + const mimeType = pickSafeMime(forwardedType, blob.type, url); const contentDisposition = response.headers.get("content-disposition"); const filename = extractFilename(url, contentDisposition); // Create File object const file = new File([blob], filename, { - type: blob.type || "application/octet-stream", + type: mimeType, }); files.push(file); diff --git a/src/lib/utils/mime.ts b/src/lib/utils/mime.ts new file mode 100644 index 00000000000..0c4c3e4c61e --- /dev/null +++ b/src/lib/utils/mime.ts @@ -0,0 +1,56 @@ +// Lightweight MIME helpers to avoid new dependencies. + +const EXTENSION_TO_MIME: Record = { + png: "image/png", + jpg: "image/jpeg", + jpe: "image/jpeg", + jpeg: "image/jpeg", + gif: "image/gif", + webp: "image/webp", + svg: "image/svg+xml", + pdf: "application/pdf", + txt: "text/plain", + csv: "text/csv", + json: "application/json", + mp3: "audio/mpeg", + wav: "audio/wav", + ogg: "audio/ogg", + mp4: "video/mp4", + mov: "video/quicktime", + webm: "video/webm", + zip: "application/zip", + gz: "application/gzip", + tgz: "application/gzip", + tar: "application/x-tar", + html: "text/html", + htm: "text/html", + md: "text/markdown", +}; + +export function guessMimeFromUrl(url: string): string | undefined { + try { + const pathname = new URL(url).pathname; + const ext = pathname.split(".").pop()?.toLowerCase(); + if (ext && EXTENSION_TO_MIME[ext]) return EXTENSION_TO_MIME[ext]; + } catch { + /* ignore */ + } + return undefined; +} + +export function pickSafeMime( + forwardedType: string | null, + blobType: string | undefined, + url: string +): string { + const inferred = guessMimeFromUrl(url); + if (forwardedType) return forwardedType; + if ( + inferred && + (!blobType || blobType === "application/octet-stream" || blobType.startsWith("text/plain")) + ) { + return inferred; + } + if (blobType) return blobType; + return inferred || "application/octet-stream"; +} diff --git a/src/routes/api/fetch-url/+server.ts b/src/routes/api/fetch-url/+server.ts index 85cbe1d7c06..cfb83bb67e8 100644 --- a/src/routes/api/fetch-url/+server.ts +++ b/src/routes/api/fetch-url/+server.ts @@ -51,12 +51,14 @@ export async function GET({ url }) { } // Stream the response back - // Always return as text/plain to prevent any HTML/JS execution - const contentType = "text/plain; charset=utf-8"; + const originalContentType = response.headers.get("content-type") || "application/octet-stream"; + // Send as text/plain for safety; expose the original type via secondary header + const safeContentType = "text/plain; charset=utf-8"; const contentDisposition = response.headers.get("content-disposition"); const headers: HeadersInit = { - "Content-Type": contentType, + "Content-Type": safeContentType, + "X-Forwarded-Content-Type": originalContentType, "Cache-Control": "public, max-age=3600", ...(contentDisposition ? { "Content-Disposition": contentDisposition } : {}), ...SECURITY_HEADERS,