From 871243f26944310d6ae43e244b7cf8989c6db0f7 Mon Sep 17 00:00:00 2001 From: Carlo Piovesan Date: Wed, 7 May 2025 17:39:03 +0200 Subject: [PATCH 1/3] Add patch to implement VirtualFileSystem constructor --- patches/duckdb/virtualized_file_system.patch | 77 ++++++++++++++++++++ 1 file changed, 77 insertions(+) create mode 100644 patches/duckdb/virtualized_file_system.patch diff --git a/patches/duckdb/virtualized_file_system.patch b/patches/duckdb/virtualized_file_system.patch new file mode 100644 index 000000000..40f0b47d3 --- /dev/null +++ b/patches/duckdb/virtualized_file_system.patch @@ -0,0 +1,77 @@ +diff --git a/src/common/virtual_file_system.cpp b/src/common/virtual_file_system.cpp +index 74892a4e05..04454b8895 100644 +--- a/src/common/virtual_file_system.cpp ++++ b/src/common/virtual_file_system.cpp +@@ -5,7 +5,7 @@ + + namespace duckdb { + +-VirtualFileSystem::VirtualFileSystem() : default_fs(FileSystem::CreateLocal()) { ++VirtualFileSystem::VirtualFileSystem(unique_ptr default) : default_fs(std::move(default)) { + VirtualFileSystem::RegisterSubSystem(FileCompressionType::GZIP, make_uniq()); + } + +diff --git a/src/include/duckdb/common/virtual_file_system.hpp b/src/include/duckdb/common/virtual_file_system.hpp +index 110ad04877..1c2d4c91d5 100644 +--- a/src/include/duckdb/common/virtual_file_system.hpp ++++ b/src/include/duckdb/common/virtual_file_system.hpp +@@ -17,7 +17,7 @@ namespace duckdb { + // bunch of wrappers to allow registering protocol handlers + class VirtualFileSystem : public FileSystem { + public: +- VirtualFileSystem(); ++ explicit VirtualFileSystem(unique_ptr inner_file_system); + + unique_ptr OpenFile(const string &path, FileOpenFlags flags, + optional_ptr opener = nullptr) override; +diff --git a/src/main/database.cpp b/src/main/database.cpp +index f51caaaee6..33230b68ab 100644 +--- a/src/main/database.cpp ++++ b/src/main/database.cpp +@@ -422,9 +422,9 @@ void DatabaseInstance::Configure(DBConfig &new_config, const char *database_path + } + config.extension_parameters = new_config.extension_parameters; + if (new_config.file_system) { +- config.file_system = std::move(new_config.file_system); ++ config.file_system = make_uniq(std::move(new_config.file_system)); + } else { +- config.file_system = make_uniq(); ++ config.file_system = make_uniq(FileSystem::CreateLocal()); + } + if (database_path && !config.options.enable_external_access) { + config.AddAllowedPath(database_path); +diff --git a/test/api/test_threads.cpp b/test/api/test_threads.cpp +index f63e36d41c..5effeb7963 100644 +--- a/test/api/test_threads.cpp ++++ b/test/api/test_threads.cpp +@@ -61,7 +61,7 @@ TEST_CASE("Test database maximum_threads argument", "[api]") { + // FIXME: not yet + { + DuckDB db(nullptr); +- auto file_system = make_uniq(); ++ auto file_system = make_uniq(FileSystem::CreateLocal()); + REQUIRE(db.NumberOfThreads() == DBConfig().GetSystemMaxThreads(*file_system)); + } + // but we can set another value +@@ -114,7 +114,7 @@ TEST_CASE("Test external threads", "[api]") { + REQUIRE(db.NumberOfThreads() == 13); + + con.Query("RESET threads"); +- auto file_system = make_uniq(); ++ auto file_system = make_uniq(FileSystem::CreateLocal()); + REQUIRE(config.options.maximum_threads == DBConfig().GetSystemMaxThreads(*file_system)); + REQUIRE(db.NumberOfThreads() == DBConfig().GetSystemMaxThreads(*file_system)); + } +diff --git a/test/sqlite/sqllogic_command.cpp b/test/sqlite/sqllogic_command.cpp +index 6d4fcca0c8..eda4cb0794 100644 +--- a/test/sqlite/sqllogic_command.cpp ++++ b/test/sqlite/sqllogic_command.cpp +@@ -482,7 +482,7 @@ void Statement::ExecuteInternal(ExecuteContext &context) const { + } + + void UnzipCommand::ExecuteInternal(ExecuteContext &context) const { +- VirtualFileSystem vfs; ++ VirtualFileSystem vfs(FileSystem::CreateLocal()); + + // input + FileOpenFlags in_flags(FileFlags::FILE_FLAGS_READ); From 0d854b2305626e029e8b4c28d0e230ca7416a87b Mon Sep 17 00:00:00 2001 From: Carlo Piovesan Date: Wed, 7 May 2025 15:35:00 +0200 Subject: [PATCH 2/3] Actually rely on VirtualFileSystem at the duckdb-wasm level Currently via patch to duckdb/duckdb, to be upstreamed --- lib/src/webdb.cc | 3 ++- packages/duckdb-wasm/test/httpfs_test.ts | 6 ++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/lib/src/webdb.cc b/lib/src/webdb.cc index 345c9ac67..f1cdd8cde 100644 --- a/lib/src/webdb.cc +++ b/lib/src/webdb.cc @@ -36,6 +36,7 @@ #include "duckdb/common/types/data_chunk.hpp" #include "duckdb/common/types/vector.hpp" #include "duckdb/common/types/vector_buffer.hpp" +#include "duckdb/common/virtual_file_system.hpp" #include "duckdb/main/query_result.hpp" #include "duckdb/parser/expression/constant_expression.hpp" #include "duckdb/parser/parser.hpp" @@ -866,7 +867,7 @@ arrow::Status WebDB::Open(std::string_view args_json) { auto buffered_fs_ptr = buffered_fs.get(); duckdb::DBConfig db_config; - db_config.file_system = std::move(buffered_fs); + db_config.file_system = std::move(make_uniq(std::move(buffered_fs))); db_config.options.allow_unsigned_extensions = config_->allow_unsigned_extensions; db_config.options.maximum_threads = config_->maximum_threads; db_config.options.use_temporary_directory = false; diff --git a/packages/duckdb-wasm/test/httpfs_test.ts b/packages/duckdb-wasm/test/httpfs_test.ts index 4ffe2ac0b..6e2f719cd 100644 --- a/packages/duckdb-wasm/test/httpfs_test.ts +++ b/packages/duckdb-wasm/test/httpfs_test.ts @@ -257,6 +257,12 @@ export function testHTTPFSAsync( expect(BigInt(results.getChildAt(2)?.get(2))).toEqual(BigInt(9n)); }); + it('can fetch over https csv.gz', async () => { + await conn!.query( + `select * from "https://raw.githubusercontent.com/duckdb/duckdb/v1.2.2/data/csv/test_apple_financial.csv.gz";`, + ); + }); + it('can read and write csv file from S3 with correct auth credentials', async () => { let data = await resolveData('/uni/studenten.parquet'); await setAwsConfig(conn!); From d2a1c69f0e7701baa8ac9da6ddf3b3da225da809 Mon Sep 17 00:00:00 2001 From: Carlo Piovesan Date: Wed, 7 May 2025 19:28:34 +0200 Subject: [PATCH 3/3] fix patch --- patches/duckdb/virtualized_file_system.patch | 59 ++------------------ 1 file changed, 4 insertions(+), 55 deletions(-) diff --git a/patches/duckdb/virtualized_file_system.patch b/patches/duckdb/virtualized_file_system.patch index 40f0b47d3..028fc3709 100644 --- a/patches/duckdb/virtualized_file_system.patch +++ b/patches/duckdb/virtualized_file_system.patch @@ -1,5 +1,5 @@ diff --git a/src/common/virtual_file_system.cpp b/src/common/virtual_file_system.cpp -index 74892a4e05..04454b8895 100644 +index 74892a4e05..c3d1c4f333 100644 --- a/src/common/virtual_file_system.cpp +++ b/src/common/virtual_file_system.cpp @@ -5,7 +5,7 @@ @@ -7,12 +7,12 @@ index 74892a4e05..04454b8895 100644 namespace duckdb { -VirtualFileSystem::VirtualFileSystem() : default_fs(FileSystem::CreateLocal()) { -+VirtualFileSystem::VirtualFileSystem(unique_ptr default) : default_fs(std::move(default)) { ++VirtualFileSystem::VirtualFileSystem(unique_ptr inner) : default_fs(std::move(inner)) { VirtualFileSystem::RegisterSubSystem(FileCompressionType::GZIP, make_uniq()); } diff --git a/src/include/duckdb/common/virtual_file_system.hpp b/src/include/duckdb/common/virtual_file_system.hpp -index 110ad04877..1c2d4c91d5 100644 +index 110ad04877..30a7eb29d3 100644 --- a/src/include/duckdb/common/virtual_file_system.hpp +++ b/src/include/duckdb/common/virtual_file_system.hpp @@ -17,7 +17,7 @@ namespace duckdb { @@ -20,58 +20,7 @@ index 110ad04877..1c2d4c91d5 100644 class VirtualFileSystem : public FileSystem { public: - VirtualFileSystem(); -+ explicit VirtualFileSystem(unique_ptr inner_file_system); ++ VirtualFileSystem(unique_ptr inner_file_system = nullptr); unique_ptr OpenFile(const string &path, FileOpenFlags flags, optional_ptr opener = nullptr) override; -diff --git a/src/main/database.cpp b/src/main/database.cpp -index f51caaaee6..33230b68ab 100644 ---- a/src/main/database.cpp -+++ b/src/main/database.cpp -@@ -422,9 +422,9 @@ void DatabaseInstance::Configure(DBConfig &new_config, const char *database_path - } - config.extension_parameters = new_config.extension_parameters; - if (new_config.file_system) { -- config.file_system = std::move(new_config.file_system); -+ config.file_system = make_uniq(std::move(new_config.file_system)); - } else { -- config.file_system = make_uniq(); -+ config.file_system = make_uniq(FileSystem::CreateLocal()); - } - if (database_path && !config.options.enable_external_access) { - config.AddAllowedPath(database_path); -diff --git a/test/api/test_threads.cpp b/test/api/test_threads.cpp -index f63e36d41c..5effeb7963 100644 ---- a/test/api/test_threads.cpp -+++ b/test/api/test_threads.cpp -@@ -61,7 +61,7 @@ TEST_CASE("Test database maximum_threads argument", "[api]") { - // FIXME: not yet - { - DuckDB db(nullptr); -- auto file_system = make_uniq(); -+ auto file_system = make_uniq(FileSystem::CreateLocal()); - REQUIRE(db.NumberOfThreads() == DBConfig().GetSystemMaxThreads(*file_system)); - } - // but we can set another value -@@ -114,7 +114,7 @@ TEST_CASE("Test external threads", "[api]") { - REQUIRE(db.NumberOfThreads() == 13); - - con.Query("RESET threads"); -- auto file_system = make_uniq(); -+ auto file_system = make_uniq(FileSystem::CreateLocal()); - REQUIRE(config.options.maximum_threads == DBConfig().GetSystemMaxThreads(*file_system)); - REQUIRE(db.NumberOfThreads() == DBConfig().GetSystemMaxThreads(*file_system)); - } -diff --git a/test/sqlite/sqllogic_command.cpp b/test/sqlite/sqllogic_command.cpp -index 6d4fcca0c8..eda4cb0794 100644 ---- a/test/sqlite/sqllogic_command.cpp -+++ b/test/sqlite/sqllogic_command.cpp -@@ -482,7 +482,7 @@ void Statement::ExecuteInternal(ExecuteContext &context) const { - } - - void UnzipCommand::ExecuteInternal(ExecuteContext &context) const { -- VirtualFileSystem vfs; -+ VirtualFileSystem vfs(FileSystem::CreateLocal()); - - // input - FileOpenFlags in_flags(FileFlags::FILE_FLAGS_READ);