From 6a38b7638fa2f3b25a8f5fe5da4e05be8e8f50c1 Mon Sep 17 00:00:00 2001 From: jheer Date: Thu, 3 Jul 2025 20:05:07 +0200 Subject: [PATCH] feat: Add forceFullHttpReads file flag. --- lib/include/duckdb/web/config.h | 8 +++++++- lib/src/config.cc | 4 ++++ lib/src/io/web_filesystem.cc | 8 +++++++- packages/duckdb-wasm/src/bindings/config.ts | 6 +++++- packages/duckdb-wasm/src/bindings/runtime.ts | 2 ++ packages/duckdb-wasm/src/bindings/runtime_browser.ts | 6 +++--- 6 files changed, 28 insertions(+), 6 deletions(-) diff --git a/lib/include/duckdb/web/config.h b/lib/include/duckdb/web/config.h index ddb068492..935c71e95 100644 --- a/lib/include/duckdb/web/config.h +++ b/lib/include/duckdb/web/config.h @@ -71,6 +71,8 @@ struct DuckDBConfigOptions { struct FileSystemConfig { /// Allow falling back to full HTTP reads if the server does not support range requests std::optional allow_full_http_reads = std::nullopt; + /// Force full HTTP reads, suppressing use of range requests + std::optional force_full_http_reads = std::nullopt; std::optional reliable_head_requests = std::nullopt; }; @@ -91,7 +93,11 @@ struct WebDBConfig { .cast_decimal_to_double = std::nullopt, }; /// The filesystem - FileSystemConfig filesystem = {.allow_full_http_reads = std::nullopt, .reliable_head_requests = std::nullopt}; + FileSystemConfig filesystem = { + .allow_full_http_reads = std::nullopt, + .force_full_http_reads = std::nullopt, + .reliable_head_requests = std::nullopt, + }; /// These options are fetched from DuckDB DuckDBConfigOptions duckdb_config_options = { diff --git a/lib/src/config.cc b/lib/src/config.cc index 8baed3636..155bca850 100644 --- a/lib/src/config.cc +++ b/lib/src/config.cc @@ -49,6 +49,7 @@ WebDBConfig WebDBConfig::ReadFrom(std::string_view args_json) { .filesystem = FileSystemConfig{ .allow_full_http_reads = std::nullopt, + .force_full_http_reads = std::nullopt, .reliable_head_requests = std::nullopt, }, .duckdb_config_options = @@ -106,6 +107,9 @@ WebDBConfig WebDBConfig::ReadFrom(std::string_view args_json) { if (fs.HasMember("allowFullHTTPReads") && fs["allowFullHTTPReads"].IsBool()) { config.filesystem.allow_full_http_reads = fs["allowFullHTTPReads"].GetBool(); } + if (fs.HasMember("forceFullHTTPReads") && fs["forceFullHTTPReads"].IsBool()) { + config.filesystem.force_full_http_reads = fs["forceFullHTTPReads"].GetBool(); + } if (fs.HasMember("reliableHeadRequests") && fs["reliableHeadRequests"].IsBool()) { config.filesystem.reliable_head_requests = fs["reliableHeadRequests"].GetBool(); } diff --git a/lib/src/io/web_filesystem.cc b/lib/src/io/web_filesystem.cc index 90391c100..d06b1a2a2 100644 --- a/lib/src/io/web_filesystem.cc +++ b/lib/src/io/web_filesystem.cc @@ -325,7 +325,10 @@ rapidjson::Value WebFileSystem::WebFile::WriteInfo(rapidjson::Document &doc) con filesystem_.config_->filesystem.allow_full_http_reads.value_or(true)) { value.AddMember("allowFullHttpReads", true, allocator); } - + if ((data_protocol_ == DataProtocol::HTTP || data_protocol_ == DataProtocol::S3) && + filesystem_.config_->filesystem.force_full_http_reads.value_or(true)) { + value.AddMember("forceFullHttpReads", true, allocator); + } if ((data_protocol_ == DataProtocol::HTTP || data_protocol_ == DataProtocol::S3)) { if (filesystem_.config_->duckdb_config_options.reliable_head_requests) value.AddMember("reliableHeadRequests", true, allocator); @@ -518,6 +521,9 @@ rapidjson::Value WebFileSystem::WriteGlobalFileInfo(rapidjson::Document &doc, ui if (config_->filesystem.allow_full_http_reads.value_or(true)) { value.AddMember("allowFullHttpReads", true, allocator); } + if (config_->filesystem.force_full_http_reads.value_or(true)) { + value.AddMember("forceFullHttpReads", true, allocator); + } if (config_->filesystem.reliable_head_requests.value_or(true)) { value.AddMember("reliableHeadRequests", true, allocator); } else { diff --git a/packages/duckdb-wasm/src/bindings/config.ts b/packages/duckdb-wasm/src/bindings/config.ts index cd7d5fae1..52a4befbc 100644 --- a/packages/duckdb-wasm/src/bindings/config.ts +++ b/packages/duckdb-wasm/src/bindings/config.ts @@ -22,11 +22,15 @@ export interface DuckDBQueryConfig { } export interface DuckDBFilesystemConfig { + reliableHeadRequests?: boolean; /** * Allow falling back to full HTTP reads if the server does not support range requests. */ - reliableHeadRequests?: boolean; allowFullHTTPReads?: boolean; + /** + * Force use of full HTTP reads, suppressing range requests. + */ + forceFullHTTPReads?: boolean; } export enum DuckDBAccessMode { diff --git a/packages/duckdb-wasm/src/bindings/runtime.ts b/packages/duckdb-wasm/src/bindings/runtime.ts index d8e3f1357..e73e71dc7 100644 --- a/packages/duckdb-wasm/src/bindings/runtime.ts +++ b/packages/duckdb-wasm/src/bindings/runtime.ts @@ -83,6 +83,7 @@ export interface DuckDBFileInfo { dataUrl: string | null; reliableHeadRequests?: boolean; allowFullHttpReads?: boolean; + forceFullHttpReads?: boolean; s3Config?: S3Config; } @@ -91,6 +92,7 @@ export interface DuckDBGlobalFileInfo { cacheEpoch: number; reliableHeadRequests?: boolean; allowFullHttpReads?: boolean; + forceFullHttpReads?: boolean; s3Config?: S3Config; } diff --git a/packages/duckdb-wasm/src/bindings/runtime_browser.ts b/packages/duckdb-wasm/src/bindings/runtime_browser.ts index 96cae4024..8a14d9403 100644 --- a/packages/duckdb-wasm/src/bindings/runtime_browser.ts +++ b/packages/duckdb-wasm/src/bindings/runtime_browser.ts @@ -244,7 +244,7 @@ export const BROWSER_RUNTIME: DuckDBRuntime & { // Supports ranges? let contentLength = null; let error: any | null = null; - if (file.reliableHeadRequests || !file.allowFullHttpReads) { + if (!file.forceFullHttpReads && (file.reliableHeadRequests || !file.allowFullHttpReads)) { try { // Send a dummy HEAD request with range protocol // -> good IFF status is 206 and contentLenght is present @@ -278,7 +278,7 @@ export const BROWSER_RUNTIME: DuckDBRuntime & { // Try to fallback to full read? if (file.allowFullHttpReads) { - { + if (!file.forceFullHttpReads) { // 2. Send a dummy GET range request querying the first byte of the file // -> good IFF status is 206 and contentLenght2 is 1 // -> otherwise, iff 200 and contentLenght2 == contentLenght @@ -354,8 +354,8 @@ export const BROWSER_RUNTIME: DuckDBRuntime & { mod.HEAPF64[(result >> 3) + 2] = +modification_time; return result; } + console.warn(`falling back to full HTTP read for: ${file.dataUrl}`); } - console.warn(`falling back to full HTTP read for: ${file.dataUrl}`); // 3. Send non-range request const xhr = new XMLHttpRequest(); if (file.dataProtocol == DuckDBDataProtocol.S3) {