From 057df022b8ef74123fd50a3169a304e4fb627abf Mon Sep 17 00:00:00 2001 From: Carlo Piovesan Date: Wed, 25 Jun 2025 09:55:09 +0200 Subject: [PATCH 1/6] Add scaffolding for duckdb-wasm's HTTP stack --- lib/CMakeLists.txt | 1 + lib/include/duckdb/web/http_wasm.h | 44 +++ lib/src/http_wasm.cc | 437 +++++++++++++++++++++++++++++ lib/src/webdb.cc | 6 + 4 files changed, 488 insertions(+) create mode 100644 lib/include/duckdb/web/http_wasm.h create mode 100644 lib/src/http_wasm.cc diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt index ac20b25d8..edbce727c 100644 --- a/lib/CMakeLists.txt +++ b/lib/CMakeLists.txt @@ -217,6 +217,7 @@ add_library( ${CMAKE_SOURCE_DIR}/src/arrow_casts.cc ${CMAKE_SOURCE_DIR}/src/arrow_insert_options.cc ${CMAKE_SOURCE_DIR}/src/arrow_stream_buffer.cc + ${CMAKE_SOURCE_DIR}/src/http_wasm.cc ${CMAKE_SOURCE_DIR}/src/arrow_type_mapping.cc ${CMAKE_SOURCE_DIR}/src/config.cc ${CMAKE_SOURCE_DIR}/src/csv_insert_options.cc diff --git a/lib/include/duckdb/web/http_wasm.h b/lib/include/duckdb/web/http_wasm.h new file mode 100644 index 000000000..a9dea3f2e --- /dev/null +++ b/lib/include/duckdb/web/http_wasm.h @@ -0,0 +1,44 @@ +#include "duckdb/common/http_util.hpp" + +namespace duckdb { + +class HTTPLogger; +class FileOpener; +struct FileOpenerInfo; +class HTTPState; + +struct HTTPFSParams : public HTTPParams { + HTTPFSParams(HTTPUtil &http_util) : HTTPParams(http_util) { + } + + static constexpr bool DEFAULT_ENABLE_SERVER_CERT_VERIFICATION = false; + static constexpr uint64_t DEFAULT_HF_MAX_PER_PAGE = 0; + static constexpr bool DEFAULT_FORCE_DOWNLOAD = false; + + bool force_download = DEFAULT_FORCE_DOWNLOAD; + bool enable_server_cert_verification = DEFAULT_ENABLE_SERVER_CERT_VERIFICATION; + idx_t hf_max_per_page = DEFAULT_HF_MAX_PER_PAGE; + string ca_cert_file; + string bearer_token; + shared_ptr state; +}; + +class HTTPWasmUtil : public HTTPUtil { +public: + unique_ptr InitializeParameters(optional_ptr opener, + optional_ptr info) override { + std::cout << "InitializeParameters\n"; + return make_uniq(*this); + } + unique_ptr InitializeClient(HTTPParams &http_params, const string &proto_host_port) override; + + //static unordered_map ParseGetParameters(const string &text); + + string GetName() const override; +}; + + + + +} // namespace duckdb + diff --git a/lib/src/http_wasm.cc b/lib/src/http_wasm.cc new file mode 100644 index 000000000..1fa12a39d --- /dev/null +++ b/lib/src/http_wasm.cc @@ -0,0 +1,437 @@ +#include "duckdb/common/http_util.hpp" +#include "duckdb/web/http_wasm.h" + +#include + +namespace duckdb { +class HTTPLogger; +class FileOpener; +struct FileOpenerInfo; +class HTTPState; + +class HTTPFSClient : public HTTPClient { +public: + HTTPFSClient(HTTPFSParams &http_params, const string &proto_host_port) { + std::cout << "built HTTPFSClient with " << proto_host_port << "\n"; + host_port = proto_host_port; +/* + client = make_uniq(proto_host_port); + client->set_follow_location(true); + client->set_keep_alive(http_params.keep_alive); + if (!http_params.ca_cert_file.empty()) { + client->set_ca_cert_path(http_params.ca_cert_file.c_str()); + } + client->enable_server_certificate_verification(http_params.enable_server_cert_verification); + client->set_write_timeout(http_params.timeout, http_params.timeout_usec); + client->set_read_timeout(http_params.timeout, http_params.timeout_usec); + client->set_connection_timeout(http_params.timeout, http_params.timeout_usec); + client->set_decompress(false); + if (!http_params.bearer_token.empty()) { + client->set_bearer_token_auth(http_params.bearer_token.c_str()); + } + + if (!http_params.http_proxy.empty()) { + client->set_proxy(http_params.http_proxy, http_params.http_proxy_port); + + if (!http_params.http_proxy_username.empty()) { + client->set_proxy_basic_auth(http_params.http_proxy_username, http_params.http_proxy_password); + } + } + state = http_params.state; +*/ + } + string host_port; + + + unique_ptr Get(GetRequestInfo &info) override { + std::cout << "Get \n"; +// +//std::cout << info.headers << "\n"; +//std::cout << info.params << "\n"; + + for (auto h : info.headers) { + std::cout << h.first << " -- " << h.second << "..\n"; + } + std::cout << "\n"; + +unique_ptr res; + + string path = host_port + info.url; + path = info.url; + +int n = 0; +for (auto h: info.headers) { + n++; +} + +char ** z = (char**)(void*)malloc(n * 4 * 2); + +int i = 0; +for (auto h: info.headers) { + z[i] = (char*)malloc(h.first.size()*4+1); + memset(z[i], 0, h.first.size()*4+1); + memcpy(z[i], h.first.c_str(), h.first.size()); + i++; + z[i] = (char*)malloc(h.second.size()+1); + memset(z[i], 0, h.first.size()+1); + memcpy(z[i], h.second.c_str(), h.second.size()); + i++; +} + + char *exe = NULL; + exe = (char *) +EM_ASM_PTR( + { + var url = (UTF8ToString($0)); + if (typeof XMLHttpRequest === "undefined") { + return 0; + } + const xhr = new XMLHttpRequest(); + xhr.open(UTF8ToString($3), url, false); + xhr.responseType = "arraybuffer"; + + var i = 0; + var len = $1; + while (i < len) { +var ptr1 = HEAP32[($2 + (i * 4)) >> 2]; +var ptr2 = HEAP32[($2 + ((i+1) * 4)) >> 2]; + + try { + //xhr.setRequestHeader(UTF8ToString(ptr1), UTF8ToString(ptr2)); + xhr.setRequestHeader(encodeURI(UTF8ToString(ptr1)), encodeURI(UTF8ToString(ptr2))); + } catch (error) { + console.log(error); +} + i+=2; + } + + try { + xhr.send(null); + console.log(xhr.response); + } catch { + return 0; + } + if (xhr.status != 200) + return 0; + var uInt8Array = xhr.response; + + var len = uInt8Array.byteLength; + var fileOnWasmHeap = _malloc(len + 4); + + var properArray = new Uint8Array(uInt8Array); + + for (var iii = 0; iii < len; iii++) { + Module.HEAPU8[iii + fileOnWasmHeap + 4] = properArray[iii]; + } + var LEN123 = new Uint8Array(4); + LEN123[0] = len % 256; + len -= LEN123[0]; + len /= 256; + LEN123[1] = len % 256; + len -= LEN123[1]; + len /= 256; + LEN123[2] = len % 256; + len -= LEN123[2]; + len /= 256; + LEN123[3] = len % 256; + len -= LEN123[3]; + len /= 256; + Module.HEAPU8.set(LEN123, fileOnWasmHeap); + console.log(properArray); + return fileOnWasmHeap; + }, path.c_str(), n, z, "GET"); + + +i = 0; +for (auto h: info.headers) { + free(z[i]); + i++; + free(z[i]); + i++; +} +free(z); + + if (!exe) { + res = make_uniq(HTTPStatusCode::NotFound_404); + res->reason = "Unknown error, something went quack in Wasm land! Please consult the console and or the docs at https://duckdb.org/community_extensions/extensions/webmacro"; + } else { + res = duckdb::make_uniq(HTTPStatusCode::OK_200); + uint64_t LEN = 0; + LEN *= 256; + LEN += ((uint8_t *)exe)[3]; + LEN *= 256; + LEN += ((uint8_t *)exe)[2]; + LEN *= 256; + LEN += ((uint8_t *)exe)[1]; + LEN *= 256; + LEN += ((uint8_t *)exe)[0]; + res->body = string(exe + 4, LEN); + free(exe); + } + + return res; + + +/* + if (state) { + state->get_count++; + } + auto headers = TransformHeaders(info.headers, info.params); + if (!info.response_handler && !info.content_handler) { + return TransformResult(client->Get(info.path, headers)); + } else { + return TransformResult(client->Get( + info.path.c_str(), headers, + [&](const duckdb_httplib_openssl::Response &response) { + auto http_response = TransformResponse(response); + return info.response_handler(*http_response); + }, + [&](const char *data, size_t data_length) { + if (state) { + state->total_bytes_received += data_length; + } + return info.content_handler(const_data_ptr_cast(data), data_length); + })); + } +*/ + } + unique_ptr Post(PostRequestInfo &info) override { + std::cout << "Post \n"; +// +//std::cout << info.headers << "\n"; +//std::cout << info.params << "\n"; + + + for (auto h : info.headers) { + std::cout << h.first << " -- " << h.second << "..\n"; + } + std::cout << "\n"; + +unique_ptr res; + + string path = host_port + info.url; + path = info.url; + +int n = 0; +for (auto h: info.headers) { + n++; +} + +char ** z = (char**)(void*)malloc(n * 4 * 2); + +int i = 0; +for (auto h: info.headers) { + z[i] = (char*)malloc(h.first.size()*4+1); + memset(z[i], 0, h.first.size()*4+1); + memcpy(z[i], h.first.c_str(), h.first.size()); + i++; + z[i] = (char*)malloc(h.second.size()*4+1); + memset(z[i], 0, h.first.size()*4+1); + memcpy(z[i], h.second.c_str(), h.second.size()); + i++; +} + + char *exe = NULL; + exe = (char *) +EM_ASM_PTR( + { + var url = (UTF8ToString($0)); + if (typeof XMLHttpRequest === "undefined") { + return 0; + } + const xhr = new XMLHttpRequest(); + xhr.open(UTF8ToString($3), url, false); + xhr.responseType = "arraybuffer"; + + var i = 0; + var len = $1; + while (i < len) { +var ptr1 = HEAP32[($2 + (i * 4)) >> 2]; +var ptr2 = HEAP32[($2 + ((i+1) * 4)) >> 2]; + + try { + xhr.setRequestHeader(encodeURI(UTF8ToString(ptr1)), encodeURI(UTF8ToString(ptr2))); + } catch (error) { + console.log(error); + } + i+=2; + } + + try { + xhr.send(UTF8ToString($4)); + } catch { + return 0; + } + if (xhr.status != 200) + return 0; + var uInt8Array = xhr.response; + + var len = uInt8Array.byteLength; + var fileOnWasmHeap = _malloc(len + 4); + + var properArray = new Uint8Array(uInt8Array); + + for (var iii = 0; iii < len; iii++) { + Module.HEAPU8[iii + fileOnWasmHeap + 4] = properArray[iii]; + } + var LEN123 = new Uint8Array(4); + LEN123[0] = len % 256; + len -= LEN123[0]; + len /= 256; + LEN123[1] = len % 256; + len -= LEN123[1]; + len /= 256; + LEN123[2] = len % 256; + len -= LEN123[2]; + len /= 256; + LEN123[3] = len % 256; + len -= LEN123[3]; + len /= 256; + Module.HEAPU8.set(LEN123, fileOnWasmHeap); + console.log(properArray); + return fileOnWasmHeap; + }, path.c_str(), n, z, "POST", info.buffer_in); + + +i = 0; +for (auto h: info.headers) { + free(z[i]); + i++; + free(z[i]); + i++; +} +free(z); + + if (!exe) { + res = make_uniq(HTTPStatusCode::NotFound_404); + res->reason = "Unknown error, something went quack in Wasm land! Please consult the console and or the docs at https://duckdb.org/community_extensions/extensions/webmacro"; + } else { + res = duckdb::make_uniq(HTTPStatusCode::OK_200); + uint64_t LEN = 0; + LEN *= 256; + LEN += ((uint8_t *)exe)[3]; + LEN *= 256; + LEN += ((uint8_t *)exe)[2]; + LEN *= 256; + LEN += ((uint8_t *)exe)[1]; + LEN *= 256; + LEN += ((uint8_t *)exe)[0]; + res->body = string(exe + 4, LEN); + free(exe); + } + + return res; + + +/* + if (state) { + state->get_count++; + } + auto headers = TransformHeaders(info.headers, info.params); + if (!info.response_handler && !info.content_handler) { + return TransformResult(client->Get(info.path, headers)); + } else { + return TransformResult(client->Get( + info.path.c_str(), headers, + [&](const duckdb_httplib_openssl::Response &response) { + auto http_response = TransformResponse(response); + return info.response_handler(*http_response); + }, + [&](const char *data, size_t data_length) { + if (state) { + state->total_bytes_received += data_length; + } + return info.content_handler(const_data_ptr_cast(data), data_length); + })); + } +*/ + } + unique_ptr Put(PutRequestInfo &info) override { + std::cout << "Put \n"; + return nullptr; +/* + if (state) { + state->put_count++; + state->total_bytes_sent += info.buffer_in_len; + } + auto headers = TransformHeaders(info.headers, info.params); + return TransformResult(client->Put(info.path, headers, const_char_ptr_cast(info.buffer_in), info.buffer_in_len, + info.content_type)); +*/ + } + + unique_ptr Head(HeadRequestInfo &info) override { + std::cout << "Head \n"; + return nullptr; +/* + if (state) { + + state->head_count++; + } + auto headers = TransformHeaders(info.headers, info.params); + return TransformResult(client->Head(info.path, headers)); +*/ + } + + unique_ptr Delete(DeleteRequestInfo &info) override { + return nullptr; +/* + if (state) { + state->delete_count++; + } + auto headers = TransformHeaders(info.headers, info.params); + return TransformResult(client->Delete(info.path, headers)); +*/ + } + + +private: +/* + duckdb_httplib_openssl::Headers TransformHeaders(const HTTPHeaders &header_map, const HTTPParams ¶ms) { + duckdb_httplib_openssl::Headers headers; + for (auto &entry : header_map) { + headers.insert(entry); + } + for (auto &entry : params.extra_headers) { + headers.insert(entry); + } + return headers; + } + + unique_ptr TransformResponse(const duckdb_httplib_openssl::Response &response) { + auto status_code = HTTPUtil::ToStatusCode(response.status); + auto result = make_uniq(status_code); + result->body = response.body; + result->reason = response.reason; + for (auto &entry : response.headers) { + result->headers.Insert(entry.first, entry.second); + } + return result; + } + + unique_ptr TransformResult(duckdb_httplib_openssl::Result &&res) { + if (res.error() == duckdb_httplib_openssl::Error::Success) { + auto &response = res.value(); + return TransformResponse(response); + } else { + auto result = make_uniq(HTTPStatusCode::INVALID); + result->request_error = to_string(res.error()); + return result; + } + } +*/ +private: +// unique_ptr client; + optional_ptr state; +}; + +unique_ptr HTTPWasmUtil::InitializeClient(HTTPParams &http_params, const string &proto_host_port) { + auto client = make_uniq(http_params.Cast(), proto_host_port); + return std::move(client); +} + +string HTTPWasmUtil::GetName() const { + return "WasmHTTPUtils"; +} + +} // namespace ducdkb diff --git a/lib/src/webdb.cc b/lib/src/webdb.cc index 8d93881f0..537991447 100644 --- a/lib/src/webdb.cc +++ b/lib/src/webdb.cc @@ -1,6 +1,7 @@ #define RAPIDJSON_HAS_STDSTRING 1 #include "duckdb/web/webdb.h" +#include "duckdb/web/http_wasm.h" #include @@ -970,6 +971,11 @@ arrow::Status WebDB::Open(std::string_view args_json) { #endif // WASM_LOADABLE_EXTENSIONS RegisterCustomExtensionOptions(db); + auto& config = duckdb::DBConfig::GetConfig(*db->instance); + if (!config.http_util || config.http_util->GetName() != string("WasmHTTPUtils")) { + config.http_util = make_shared_ptr(); + } + // Reset state that is specific to the old database connections_.clear(); database_.reset(); From cd0d2b531dd305d8afe4db4d719156acb50f15e1 Mon Sep 17 00:00:00 2001 From: Carlo Piovesan Date: Wed, 25 Jun 2025 10:02:19 +0200 Subject: [PATCH 2/6] Fix http_wasm and format --- lib/src/http_wasm.cc | 662 ++++++++++++++++--------------------------- 1 file changed, 246 insertions(+), 416 deletions(-) diff --git a/lib/src/http_wasm.cc b/lib/src/http_wasm.cc index 1fa12a39d..2087a8415 100644 --- a/lib/src/http_wasm.cc +++ b/lib/src/http_wasm.cc @@ -1,8 +1,11 @@ -#include "duckdb/common/http_util.hpp" #include "duckdb/web/http_wasm.h" +#include + #include +#include "duckdb/common/http_util.hpp" + namespace duckdb { class HTTPLogger; class FileOpener; @@ -10,428 +13,255 @@ struct FileOpenerInfo; class HTTPState; class HTTPFSClient : public HTTPClient { -public: - HTTPFSClient(HTTPFSParams &http_params, const string &proto_host_port) { - std::cout << "built HTTPFSClient with " << proto_host_port << "\n"; - host_port = proto_host_port; -/* - client = make_uniq(proto_host_port); - client->set_follow_location(true); - client->set_keep_alive(http_params.keep_alive); - if (!http_params.ca_cert_file.empty()) { - client->set_ca_cert_path(http_params.ca_cert_file.c_str()); - } - client->enable_server_certificate_verification(http_params.enable_server_cert_verification); - client->set_write_timeout(http_params.timeout, http_params.timeout_usec); - client->set_read_timeout(http_params.timeout, http_params.timeout_usec); - client->set_connection_timeout(http_params.timeout, http_params.timeout_usec); - client->set_decompress(false); - if (!http_params.bearer_token.empty()) { - client->set_bearer_token_auth(http_params.bearer_token.c_str()); - } - - if (!http_params.http_proxy.empty()) { - client->set_proxy(http_params.http_proxy, http_params.http_proxy_port); - - if (!http_params.http_proxy_username.empty()) { - client->set_proxy_basic_auth(http_params.http_proxy_username, http_params.http_proxy_password); - } - } - state = http_params.state; -*/ - } - string host_port; - - - unique_ptr Get(GetRequestInfo &info) override { - std::cout << "Get \n"; -// -//std::cout << info.headers << "\n"; -//std::cout << info.params << "\n"; - - for (auto h : info.headers) { - std::cout << h.first << " -- " << h.second << "..\n"; - } - std::cout << "\n"; - -unique_ptr res; - - string path = host_port + info.url; - path = info.url; - -int n = 0; -for (auto h: info.headers) { - n++; -} - -char ** z = (char**)(void*)malloc(n * 4 * 2); - -int i = 0; -for (auto h: info.headers) { - z[i] = (char*)malloc(h.first.size()*4+1); - memset(z[i], 0, h.first.size()*4+1); - memcpy(z[i], h.first.c_str(), h.first.size()); - i++; - z[i] = (char*)malloc(h.second.size()+1); - memset(z[i], 0, h.first.size()+1); - memcpy(z[i], h.second.c_str(), h.second.size()); - i++; -} - - char *exe = NULL; - exe = (char *) -EM_ASM_PTR( - { - var url = (UTF8ToString($0)); - if (typeof XMLHttpRequest === "undefined") { - return 0; - } - const xhr = new XMLHttpRequest(); - xhr.open(UTF8ToString($3), url, false); - xhr.responseType = "arraybuffer"; - - var i = 0; - var len = $1; - while (i < len) { -var ptr1 = HEAP32[($2 + (i * 4)) >> 2]; -var ptr2 = HEAP32[($2 + ((i+1) * 4)) >> 2]; - - try { - //xhr.setRequestHeader(UTF8ToString(ptr1), UTF8ToString(ptr2)); - xhr.setRequestHeader(encodeURI(UTF8ToString(ptr1)), encodeURI(UTF8ToString(ptr2))); - } catch (error) { - console.log(error); -} - i+=2; - } - - try { - xhr.send(null); - console.log(xhr.response); - } catch { - return 0; - } - if (xhr.status != 200) - return 0; - var uInt8Array = xhr.response; - - var len = uInt8Array.byteLength; - var fileOnWasmHeap = _malloc(len + 4); - - var properArray = new Uint8Array(uInt8Array); - - for (var iii = 0; iii < len; iii++) { - Module.HEAPU8[iii + fileOnWasmHeap + 4] = properArray[iii]; - } - var LEN123 = new Uint8Array(4); - LEN123[0] = len % 256; - len -= LEN123[0]; - len /= 256; - LEN123[1] = len % 256; - len -= LEN123[1]; - len /= 256; - LEN123[2] = len % 256; - len -= LEN123[2]; - len /= 256; - LEN123[3] = len % 256; - len -= LEN123[3]; - len /= 256; - Module.HEAPU8.set(LEN123, fileOnWasmHeap); - console.log(properArray); - return fileOnWasmHeap; - }, path.c_str(), n, z, "GET"); - - -i = 0; -for (auto h: info.headers) { - free(z[i]); - i++; - free(z[i]); - i++; -} -free(z); - - if (!exe) { - res = make_uniq(HTTPStatusCode::NotFound_404); - res->reason = "Unknown error, something went quack in Wasm land! Please consult the console and or the docs at https://duckdb.org/community_extensions/extensions/webmacro"; - } else { - res = duckdb::make_uniq(HTTPStatusCode::OK_200); - uint64_t LEN = 0; - LEN *= 256; - LEN += ((uint8_t *)exe)[3]; - LEN *= 256; - LEN += ((uint8_t *)exe)[2]; - LEN *= 256; - LEN += ((uint8_t *)exe)[1]; - LEN *= 256; - LEN += ((uint8_t *)exe)[0]; - res->body = string(exe + 4, LEN); - free(exe); + public: + HTTPFSClient(HTTPFSParams &http_params, const string &proto_host_port) { host_port = proto_host_port; } + string host_port; + + unique_ptr Get(GetRequestInfo &info) override { + unique_ptr res; + + string path = host_port + info.url; + path = info.url; + + int n = 0; + for (auto h : info.headers) { + n++; + } + + char **z = (char **)(void *)malloc(n * 4 * 2); + + int i = 0; + for (auto h : info.headers) { + z[i] = (char *)malloc(h.first.size() * 4 + 1); + memset(z[i], 0, h.first.size() * 4 + 1); + memcpy(z[i], h.first.c_str(), h.first.size()); + i++; + z[i] = (char *)malloc(h.second.size() + 1); + memset(z[i], 0, h.first.size() + 1); + memcpy(z[i], h.second.c_str(), h.second.size()); + i++; + } + + char *exe = NULL; + exe = (char *)EM_ASM_PTR( + { + var url = (UTF8ToString($0)); + if (typeof XMLHttpRequest == = "undefined") { + return 0; + } + const xhr = new XMLHttpRequest(); + xhr.open(UTF8ToString($3), url, false); + xhr.responseType = "arraybuffer"; + + var i = 0; + var len = $1; + while (i < len) { + var ptr1 = HEAP32[($2 + (i * 4)) >> 2]; + var ptr2 = HEAP32[($2 + ((i + 1) * 4)) >> 2]; + + try { + xhr.setRequestHeader(encodeURI(UTF8ToString(ptr1)), encodeURI(UTF8ToString(ptr2))); + } catch (error) { + console.warn("Error while performing XMLHttpRequest.setRequestHeader(), error); + } + i += 2; + } + + try { + xhr.send(null); + } catch { + return 0; + } + if (xhr.status != 200) return 0; + var uInt8Array = xhr.response; + + var len = uInt8Array.byteLength; + var fileOnWasmHeap = _malloc(len + 4); + + var properArray = new Uint8Array(uInt8Array); + + for (var iii = 0; iii < len; iii++) { + Module.HEAPU8[iii + fileOnWasmHeap + 4] = properArray[iii]; + } + var LEN123 = new Uint8Array(4); + LEN123[0] = len % 256; + len -= LEN123[0]; + len /= 256; + LEN123[1] = len % 256; + len -= LEN123[1]; + len /= 256; + LEN123[2] = len % 256; + len -= LEN123[2]; + len /= 256; + LEN123[3] = len % 256; + len -= LEN123[3]; + len /= 256; + Module.HEAPU8.set(LEN123, fileOnWasmHeap); + return fileOnWasmHeap; + }, + path.c_str(), n, z, "GET"); + + i = 0; + for (auto h : info.headers) { + free(z[i]); + i++; + free(z[i]); + i++; + } + free(z); + + if (!exe) { + res = make_uniq(HTTPStatusCode::NotFound_404); + res->reason = + "Unknown error, something went quack in Wasm land! Please consult the console and or the docs at " + "https://duckdb.org/community_extensions/extensions/webmacro"; + } else { + res = duckdb::make_uniq(HTTPStatusCode::OK_200); + uint64_t LEN = 0; + LEN *= 256; + LEN += ((uint8_t *)exe)[3]; + LEN *= 256; + LEN += ((uint8_t *)exe)[2]; + LEN *= 256; + LEN += ((uint8_t *)exe)[1]; + LEN *= 256; + LEN += ((uint8_t *)exe)[0]; + res->body = string(exe + 4, LEN); + free(exe); + } + + return res; } + unique_ptr Post(PostRequestInfo &info) override { + unique_ptr res; + + string path = host_port + info.url; + path = info.url; + + int n = 0; + for (auto h : info.headers) { + n++; + } + + char **z = (char **)(void *)malloc(n * 4 * 2); + + int i = 0; + for (auto h : info.headers) { + z[i] = (char *)malloc(h.first.size() * 4 + 1); + memset(z[i], 0, h.first.size() * 4 + 1); + memcpy(z[i], h.first.c_str(), h.first.size()); + i++; + z[i] = (char *)malloc(h.second.size() * 4 + 1); + memset(z[i], 0, h.first.size() * 4 + 1); + memcpy(z[i], h.second.c_str(), h.second.size()); + i++; + } + + char *exe = NULL; + exe = (char *)EM_ASM_PTR( + { + var url = (UTF8ToString($0)); + if (typeof XMLHttpRequest == = "undefined") { + return 0; + } + const xhr = new XMLHttpRequest(); + xhr.open(UTF8ToString($3), url, false); + xhr.responseType = "arraybuffer"; + + var i = 0; + var len = $1; + while (i < len) { + var ptr1 = HEAP32[($2 + (i * 4)) >> 2]; + var ptr2 = HEAP32[($2 + ((i + 1) * 4)) >> 2]; + + try { + xhr.setRequestHeader(encodeURI(UTF8ToString(ptr1)), encodeURI(UTF8ToString(ptr2))); + } catch (error) { + console.warn("Error while performing XMLHttpRequest.setRequestHeader(), error); + } + i += 2; + } + + try { + xhr.send(UTF8ToString($4)); + } catch { + return 0; + } + if (xhr.status != 200) return 0; + var uInt8Array = xhr.response; + + var len = uInt8Array.byteLength; + var fileOnWasmHeap = _malloc(len + 4); + + var properArray = new Uint8Array(uInt8Array); + + for (var iii = 0; iii < len; iii++) { + Module.HEAPU8[iii + fileOnWasmHeap + 4] = properArray[iii]; + } + var LEN123 = new Uint8Array(4); + LEN123[0] = len % 256; + len -= LEN123[0]; + len /= 256; + LEN123[1] = len % 256; + len -= LEN123[1]; + len /= 256; + LEN123[2] = len % 256; + len -= LEN123[2]; + len /= 256; + LEN123[3] = len % 256; + len -= LEN123[3]; + len /= 256; + Module.HEAPU8.set(LEN123, fileOnWasmHeap); + return fileOnWasmHeap; + }, + path.c_str(), n, z, "POST", info.buffer_in); + + i = 0; + for (auto h : info.headers) { + free(z[i]); + i++; + free(z[i]); + i++; + } + free(z); + + if (!exe) { + res = make_uniq(HTTPStatusCode::NotFound_404); + res->reason = + "Unknown error, something went quack in Wasm land! Please consult the console and or the docs at " + "https://duckdb.org/community_extensions/extensions/webmacro"; + } else { + res = duckdb::make_uniq(HTTPStatusCode::OK_200); + uint64_t LEN = 0; + LEN *= 256; + LEN += ((uint8_t *)exe)[3]; + LEN *= 256; + LEN += ((uint8_t *)exe)[2]; + LEN *= 256; + LEN += ((uint8_t *)exe)[1]; + LEN *= 256; + LEN += ((uint8_t *)exe)[0]; + res->body = string(exe + 4, LEN); + free(exe); + } + + return res; + } + unique_ptr Put(PutRequestInfo &info) override { return nullptr; } - return res; - - -/* - if (state) { - state->get_count++; - } - auto headers = TransformHeaders(info.headers, info.params); - if (!info.response_handler && !info.content_handler) { - return TransformResult(client->Get(info.path, headers)); - } else { - return TransformResult(client->Get( - info.path.c_str(), headers, - [&](const duckdb_httplib_openssl::Response &response) { - auto http_response = TransformResponse(response); - return info.response_handler(*http_response); - }, - [&](const char *data, size_t data_length) { - if (state) { - state->total_bytes_received += data_length; - } - return info.content_handler(const_data_ptr_cast(data), data_length); - })); - } -*/ - } - unique_ptr Post(PostRequestInfo &info) override { - std::cout << "Post \n"; -// -//std::cout << info.headers << "\n"; -//std::cout << info.params << "\n"; - - - for (auto h : info.headers) { - std::cout << h.first << " -- " << h.second << "..\n"; - } - std::cout << "\n"; - -unique_ptr res; - - string path = host_port + info.url; - path = info.url; - -int n = 0; -for (auto h: info.headers) { - n++; -} - -char ** z = (char**)(void*)malloc(n * 4 * 2); - -int i = 0; -for (auto h: info.headers) { - z[i] = (char*)malloc(h.first.size()*4+1); - memset(z[i], 0, h.first.size()*4+1); - memcpy(z[i], h.first.c_str(), h.first.size()); - i++; - z[i] = (char*)malloc(h.second.size()*4+1); - memset(z[i], 0, h.first.size()*4+1); - memcpy(z[i], h.second.c_str(), h.second.size()); - i++; -} + unique_ptr Head(HeadRequestInfo &info) override { return nullptr; } - char *exe = NULL; - exe = (char *) -EM_ASM_PTR( - { - var url = (UTF8ToString($0)); - if (typeof XMLHttpRequest === "undefined") { - return 0; - } - const xhr = new XMLHttpRequest(); - xhr.open(UTF8ToString($3), url, false); - xhr.responseType = "arraybuffer"; - - var i = 0; - var len = $1; - while (i < len) { -var ptr1 = HEAP32[($2 + (i * 4)) >> 2]; -var ptr2 = HEAP32[($2 + ((i+1) * 4)) >> 2]; - - try { - xhr.setRequestHeader(encodeURI(UTF8ToString(ptr1)), encodeURI(UTF8ToString(ptr2))); - } catch (error) { - console.log(error); - } - i+=2; - } - - try { - xhr.send(UTF8ToString($4)); - } catch { - return 0; - } - if (xhr.status != 200) - return 0; - var uInt8Array = xhr.response; - - var len = uInt8Array.byteLength; - var fileOnWasmHeap = _malloc(len + 4); - - var properArray = new Uint8Array(uInt8Array); - - for (var iii = 0; iii < len; iii++) { - Module.HEAPU8[iii + fileOnWasmHeap + 4] = properArray[iii]; - } - var LEN123 = new Uint8Array(4); - LEN123[0] = len % 256; - len -= LEN123[0]; - len /= 256; - LEN123[1] = len % 256; - len -= LEN123[1]; - len /= 256; - LEN123[2] = len % 256; - len -= LEN123[2]; - len /= 256; - LEN123[3] = len % 256; - len -= LEN123[3]; - len /= 256; - Module.HEAPU8.set(LEN123, fileOnWasmHeap); - console.log(properArray); - return fileOnWasmHeap; - }, path.c_str(), n, z, "POST", info.buffer_in); - - -i = 0; -for (auto h: info.headers) { - free(z[i]); - i++; - free(z[i]); - i++; -} -free(z); - - if (!exe) { - res = make_uniq(HTTPStatusCode::NotFound_404); - res->reason = "Unknown error, something went quack in Wasm land! Please consult the console and or the docs at https://duckdb.org/community_extensions/extensions/webmacro"; - } else { - res = duckdb::make_uniq(HTTPStatusCode::OK_200); - uint64_t LEN = 0; - LEN *= 256; - LEN += ((uint8_t *)exe)[3]; - LEN *= 256; - LEN += ((uint8_t *)exe)[2]; - LEN *= 256; - LEN += ((uint8_t *)exe)[1]; - LEN *= 256; - LEN += ((uint8_t *)exe)[0]; - res->body = string(exe + 4, LEN); - free(exe); - } + unique_ptr Delete(DeleteRequestInfo &info) override { return nullptr; } - return res; - - -/* - if (state) { - state->get_count++; - } - auto headers = TransformHeaders(info.headers, info.params); - if (!info.response_handler && !info.content_handler) { - return TransformResult(client->Get(info.path, headers)); - } else { - return TransformResult(client->Get( - info.path.c_str(), headers, - [&](const duckdb_httplib_openssl::Response &response) { - auto http_response = TransformResponse(response); - return info.response_handler(*http_response); - }, - [&](const char *data, size_t data_length) { - if (state) { - state->total_bytes_received += data_length; - } - return info.content_handler(const_data_ptr_cast(data), data_length); - })); - } -*/ - } - unique_ptr Put(PutRequestInfo &info) override { - std::cout << "Put \n"; - return nullptr; -/* - if (state) { - state->put_count++; - state->total_bytes_sent += info.buffer_in_len; - } - auto headers = TransformHeaders(info.headers, info.params); - return TransformResult(client->Put(info.path, headers, const_char_ptr_cast(info.buffer_in), info.buffer_in_len, - info.content_type)); -*/ - } - - unique_ptr Head(HeadRequestInfo &info) override { - std::cout << "Head \n"; - return nullptr; -/* - if (state) { - - state->head_count++; - } - auto headers = TransformHeaders(info.headers, info.params); - return TransformResult(client->Head(info.path, headers)); -*/ - } - - unique_ptr Delete(DeleteRequestInfo &info) override { - return nullptr; -/* - if (state) { - state->delete_count++; - } - auto headers = TransformHeaders(info.headers, info.params); - return TransformResult(client->Delete(info.path, headers)); -*/ - } - - -private: -/* - duckdb_httplib_openssl::Headers TransformHeaders(const HTTPHeaders &header_map, const HTTPParams ¶ms) { - duckdb_httplib_openssl::Headers headers; - for (auto &entry : header_map) { - headers.insert(entry); - } - for (auto &entry : params.extra_headers) { - headers.insert(entry); - } - return headers; - } - - unique_ptr TransformResponse(const duckdb_httplib_openssl::Response &response) { - auto status_code = HTTPUtil::ToStatusCode(response.status); - auto result = make_uniq(status_code); - result->body = response.body; - result->reason = response.reason; - for (auto &entry : response.headers) { - result->headers.Insert(entry.first, entry.second); - } - return result; - } - - unique_ptr TransformResult(duckdb_httplib_openssl::Result &&res) { - if (res.error() == duckdb_httplib_openssl::Error::Success) { - auto &response = res.value(); - return TransformResponse(response); - } else { - auto result = make_uniq(HTTPStatusCode::INVALID); - result->request_error = to_string(res.error()); - return result; - } - } -*/ -private: -// unique_ptr client; - optional_ptr state; + private: + optional_ptr state; }; unique_ptr HTTPWasmUtil::InitializeClient(HTTPParams &http_params, const string &proto_host_port) { - auto client = make_uniq(http_params.Cast(), proto_host_port); - return std::move(client); + auto client = make_uniq(http_params.Cast(), proto_host_port); + return std::move(client); } -string HTTPWasmUtil::GetName() const { - return "WasmHTTPUtils"; -} +string HTTPWasmUtil::GetName() const { return "WasmHTTPUtils"; } -} // namespace ducdkb +} // namespace duckdb From 1c0b20e710dac25e9529f47334a243ba2d36e7c1 Mon Sep 17 00:00:00 2001 From: Carlo Piovesan Date: Wed, 25 Jun 2025 10:13:51 +0200 Subject: [PATCH 3/6] Format lib/include/duckdb/web/http_wasm.h --- lib/include/duckdb/web/http_wasm.h | 49 ++++++++++++++---------------- lib/src/http_wasm.cc | 8 ++--- lib/src/webdb.cc | 2 +- 3 files changed, 27 insertions(+), 32 deletions(-) diff --git a/lib/include/duckdb/web/http_wasm.h b/lib/include/duckdb/web/http_wasm.h index a9dea3f2e..a3bfc5cc8 100644 --- a/lib/include/duckdb/web/http_wasm.h +++ b/lib/include/duckdb/web/http_wasm.h @@ -8,37 +8,32 @@ struct FileOpenerInfo; class HTTPState; struct HTTPFSParams : public HTTPParams { - HTTPFSParams(HTTPUtil &http_util) : HTTPParams(http_util) { - } - - static constexpr bool DEFAULT_ENABLE_SERVER_CERT_VERIFICATION = false; - static constexpr uint64_t DEFAULT_HF_MAX_PER_PAGE = 0; - static constexpr bool DEFAULT_FORCE_DOWNLOAD = false; - - bool force_download = DEFAULT_FORCE_DOWNLOAD; - bool enable_server_cert_verification = DEFAULT_ENABLE_SERVER_CERT_VERIFICATION; - idx_t hf_max_per_page = DEFAULT_HF_MAX_PER_PAGE; - string ca_cert_file; - string bearer_token; - shared_ptr state; + HTTPFSParams(HTTPUtil &http_util) : HTTPParams(http_util) {} + + static constexpr bool DEFAULT_ENABLE_SERVER_CERT_VERIFICATION = false; + static constexpr uint64_t DEFAULT_HF_MAX_PER_PAGE = 0; + static constexpr bool DEFAULT_FORCE_DOWNLOAD = false; + + bool force_download = DEFAULT_FORCE_DOWNLOAD; + bool enable_server_cert_verification = DEFAULT_ENABLE_SERVER_CERT_VERIFICATION; + idx_t hf_max_per_page = DEFAULT_HF_MAX_PER_PAGE; + string ca_cert_file; + string bearer_token; + shared_ptr state; }; class HTTPWasmUtil : public HTTPUtil { -public: - unique_ptr InitializeParameters(optional_ptr opener, - optional_ptr info) override { - std::cout << "InitializeParameters\n"; - return make_uniq(*this); - } - unique_ptr InitializeClient(HTTPParams &http_params, const string &proto_host_port) override; + public: + unique_ptr InitializeParameters(optional_ptr opener, + optional_ptr info) override { + std::cout << "InitializeParameters\n"; + return make_uniq(*this); + } + unique_ptr InitializeClient(HTTPParams &http_params, const string &proto_host_port) override; - //static unordered_map ParseGetParameters(const string &text); + // static unordered_map ParseGetParameters(const string &text); - string GetName() const override; + string GetName() const override; }; - - - -} // namespace duckdb - +} // namespace duckdb diff --git a/lib/src/http_wasm.cc b/lib/src/http_wasm.cc index 2087a8415..cb433a132 100644 --- a/lib/src/http_wasm.cc +++ b/lib/src/http_wasm.cc @@ -46,7 +46,7 @@ class HTTPFSClient : public HTTPClient { exe = (char *)EM_ASM_PTR( { var url = (UTF8ToString($0)); - if (typeof XMLHttpRequest == = "undefined") { + if (typeof XMLHttpRequest === "undefined") { return 0; } const xhr = new XMLHttpRequest(); @@ -62,7 +62,7 @@ class HTTPFSClient : public HTTPClient { try { xhr.setRequestHeader(encodeURI(UTF8ToString(ptr1)), encodeURI(UTF8ToString(ptr2))); } catch (error) { - console.warn("Error while performing XMLHttpRequest.setRequestHeader(), error); + console.warn("Error while performing XMLHttpRequest.setRequestHeader()", error); } i += 2; } @@ -161,7 +161,7 @@ class HTTPFSClient : public HTTPClient { exe = (char *)EM_ASM_PTR( { var url = (UTF8ToString($0)); - if (typeof XMLHttpRequest == = "undefined") { + if (typeof XMLHttpRequest === "undefined") { return 0; } const xhr = new XMLHttpRequest(); @@ -177,7 +177,7 @@ class HTTPFSClient : public HTTPClient { try { xhr.setRequestHeader(encodeURI(UTF8ToString(ptr1)), encodeURI(UTF8ToString(ptr2))); } catch (error) { - console.warn("Error while performing XMLHttpRequest.setRequestHeader(), error); + console.warn("Error while performing XMLHttpRequest.setRequestHeader()", error); } i += 2; } diff --git a/lib/src/webdb.cc b/lib/src/webdb.cc index 537991447..8e6d17e06 100644 --- a/lib/src/webdb.cc +++ b/lib/src/webdb.cc @@ -1,7 +1,6 @@ #define RAPIDJSON_HAS_STDSTRING 1 #include "duckdb/web/webdb.h" -#include "duckdb/web/http_wasm.h" #include @@ -53,6 +52,7 @@ #include "duckdb/web/extensions/json_extension.h" #include "duckdb/web/extensions/parquet_extension.h" #include "duckdb/web/functions/table_function_relation.h" +#include "duckdb/web/http_wasm.h" #include "duckdb/web/io/arrow_ifstream.h" #include "duckdb/web/io/buffered_filesystem.h" #include "duckdb/web/io/file_page_buffer.h" From bf1411202a1e1fe2b51cb3b90a8e699a5107edbf Mon Sep 17 00:00:00 2001 From: Carlo Piovesan Date: Wed, 25 Jun 2025 10:22:27 +0200 Subject: [PATCH 4/6] clang-foramt off/on --- lib/src/http_wasm.cc | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/lib/src/http_wasm.cc b/lib/src/http_wasm.cc index cb433a132..9d0bbae9d 100644 --- a/lib/src/http_wasm.cc +++ b/lib/src/http_wasm.cc @@ -36,12 +36,13 @@ class HTTPFSClient : public HTTPClient { memset(z[i], 0, h.first.size() * 4 + 1); memcpy(z[i], h.first.c_str(), h.first.size()); i++; - z[i] = (char *)malloc(h.second.size() + 1); - memset(z[i], 0, h.first.size() + 1); + z[i] = (char *)malloc(h.second.size()*4 + 1); + memset(z[i], 0, h.first.size() *4+ 1); memcpy(z[i], h.second.c_str(), h.second.size()); i++; } + // clang-format off char *exe = NULL; exe = (char *)EM_ASM_PTR( { @@ -100,6 +101,7 @@ class HTTPFSClient : public HTTPClient { return fileOnWasmHeap; }, path.c_str(), n, z, "GET"); + // clang-format on i = 0; for (auto h : info.headers) { @@ -157,6 +159,7 @@ class HTTPFSClient : public HTTPClient { i++; } + // clang-format off char *exe = NULL; exe = (char *)EM_ASM_PTR( { @@ -244,6 +247,7 @@ class HTTPFSClient : public HTTPClient { res->body = string(exe + 4, LEN); free(exe); } + // clang-format on return res; } From 0318572c3656a6c48ffa072a646c52e55c97918f Mon Sep 17 00:00:00 2001 From: Carlo Piovesan Date: Wed, 25 Jun 2025 10:32:11 +0200 Subject: [PATCH 5/6] Remove printf debugging line --- lib/include/duckdb/web/http_wasm.h | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/include/duckdb/web/http_wasm.h b/lib/include/duckdb/web/http_wasm.h index a3bfc5cc8..af92f498c 100644 --- a/lib/include/duckdb/web/http_wasm.h +++ b/lib/include/duckdb/web/http_wasm.h @@ -26,7 +26,6 @@ class HTTPWasmUtil : public HTTPUtil { public: unique_ptr InitializeParameters(optional_ptr opener, optional_ptr info) override { - std::cout << "InitializeParameters\n"; return make_uniq(*this); } unique_ptr InitializeClient(HTTPParams &http_params, const string &proto_host_port) override; From 75e254c036d4cd511abe9ae93f3efa493af372cc Mon Sep 17 00:00:00 2001 From: Carlo Piovesan Date: Wed, 25 Jun 2025 10:35:11 +0200 Subject: [PATCH 6/6] more format --- lib/src/http_wasm.cc | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/lib/src/http_wasm.cc b/lib/src/http_wasm.cc index 9d0bbae9d..8dedd7881 100644 --- a/lib/src/http_wasm.cc +++ b/lib/src/http_wasm.cc @@ -36,13 +36,13 @@ class HTTPFSClient : public HTTPClient { memset(z[i], 0, h.first.size() * 4 + 1); memcpy(z[i], h.first.c_str(), h.first.size()); i++; - z[i] = (char *)malloc(h.second.size()*4 + 1); - memset(z[i], 0, h.first.size() *4+ 1); + z[i] = (char *)malloc(h.second.size() * 4 + 1); + memset(z[i], 0, h.first.size() * 4 + 1); memcpy(z[i], h.second.c_str(), h.second.size()); i++; } - // clang-format off + // clang-format off char *exe = NULL; exe = (char *)EM_ASM_PTR( { @@ -101,7 +101,7 @@ class HTTPFSClient : public HTTPClient { return fileOnWasmHeap; }, path.c_str(), n, z, "GET"); - // clang-format on + // clang-format on i = 0; for (auto h : info.headers) { @@ -115,8 +115,8 @@ class HTTPFSClient : public HTTPClient { if (!exe) { res = make_uniq(HTTPStatusCode::NotFound_404); res->reason = - "Unknown error, something went quack in Wasm land! Please consult the console and or the docs at " - "https://duckdb.org/community_extensions/extensions/webmacro"; + "Unknown error, something went wrong in Wasm land! Please consult the console and consider reporting a " + "bug"; } else { res = duckdb::make_uniq(HTTPStatusCode::OK_200); uint64_t LEN = 0; @@ -159,7 +159,7 @@ class HTTPFSClient : public HTTPClient { i++; } - // clang-format off + // clang-format off char *exe = NULL; exe = (char *)EM_ASM_PTR( { @@ -247,7 +247,7 @@ class HTTPFSClient : public HTTPClient { res->body = string(exe + 4, LEN); free(exe); } - // clang-format on + // clang-format on return res; }