diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt index ac20b25d8..edbce727c 100644 --- a/lib/CMakeLists.txt +++ b/lib/CMakeLists.txt @@ -217,6 +217,7 @@ add_library( ${CMAKE_SOURCE_DIR}/src/arrow_casts.cc ${CMAKE_SOURCE_DIR}/src/arrow_insert_options.cc ${CMAKE_SOURCE_DIR}/src/arrow_stream_buffer.cc + ${CMAKE_SOURCE_DIR}/src/http_wasm.cc ${CMAKE_SOURCE_DIR}/src/arrow_type_mapping.cc ${CMAKE_SOURCE_DIR}/src/config.cc ${CMAKE_SOURCE_DIR}/src/csv_insert_options.cc diff --git a/lib/include/duckdb/web/http_wasm.h b/lib/include/duckdb/web/http_wasm.h new file mode 100644 index 000000000..af92f498c --- /dev/null +++ b/lib/include/duckdb/web/http_wasm.h @@ -0,0 +1,38 @@ +#include "duckdb/common/http_util.hpp" + +namespace duckdb { + +class HTTPLogger; +class FileOpener; +struct FileOpenerInfo; +class HTTPState; + +struct HTTPFSParams : public HTTPParams { + HTTPFSParams(HTTPUtil &http_util) : HTTPParams(http_util) {} + + static constexpr bool DEFAULT_ENABLE_SERVER_CERT_VERIFICATION = false; + static constexpr uint64_t DEFAULT_HF_MAX_PER_PAGE = 0; + static constexpr bool DEFAULT_FORCE_DOWNLOAD = false; + + bool force_download = DEFAULT_FORCE_DOWNLOAD; + bool enable_server_cert_verification = DEFAULT_ENABLE_SERVER_CERT_VERIFICATION; + idx_t hf_max_per_page = DEFAULT_HF_MAX_PER_PAGE; + string ca_cert_file; + string bearer_token; + shared_ptr state; +}; + +class HTTPWasmUtil : public HTTPUtil { + public: + unique_ptr InitializeParameters(optional_ptr opener, + optional_ptr info) override { + return make_uniq(*this); + } + unique_ptr InitializeClient(HTTPParams &http_params, const string &proto_host_port) override; + + // static unordered_map ParseGetParameters(const string &text); + + string GetName() const override; +}; + +} // namespace duckdb diff --git a/lib/src/http_wasm.cc b/lib/src/http_wasm.cc new file mode 100644 index 000000000..8dedd7881 --- /dev/null +++ b/lib/src/http_wasm.cc @@ -0,0 +1,271 @@ +#include "duckdb/web/http_wasm.h" + +#include + +#include + +#include "duckdb/common/http_util.hpp" + +namespace duckdb { +class HTTPLogger; +class FileOpener; +struct FileOpenerInfo; +class HTTPState; + +class HTTPFSClient : public HTTPClient { + public: + HTTPFSClient(HTTPFSParams &http_params, const string &proto_host_port) { host_port = proto_host_port; } + string host_port; + + unique_ptr Get(GetRequestInfo &info) override { + unique_ptr res; + + string path = host_port + info.url; + path = info.url; + + int n = 0; + for (auto h : info.headers) { + n++; + } + + char **z = (char **)(void *)malloc(n * 4 * 2); + + int i = 0; + for (auto h : info.headers) { + z[i] = (char *)malloc(h.first.size() * 4 + 1); + memset(z[i], 0, h.first.size() * 4 + 1); + memcpy(z[i], h.first.c_str(), h.first.size()); + i++; + z[i] = (char *)malloc(h.second.size() * 4 + 1); + memset(z[i], 0, h.first.size() * 4 + 1); + memcpy(z[i], h.second.c_str(), h.second.size()); + i++; + } + + // clang-format off + char *exe = NULL; + exe = (char *)EM_ASM_PTR( + { + var url = (UTF8ToString($0)); + if (typeof XMLHttpRequest === "undefined") { + return 0; + } + const xhr = new XMLHttpRequest(); + xhr.open(UTF8ToString($3), url, false); + xhr.responseType = "arraybuffer"; + + var i = 0; + var len = $1; + while (i < len) { + var ptr1 = HEAP32[($2 + (i * 4)) >> 2]; + var ptr2 = HEAP32[($2 + ((i + 1) * 4)) >> 2]; + + try { + xhr.setRequestHeader(encodeURI(UTF8ToString(ptr1)), encodeURI(UTF8ToString(ptr2))); + } catch (error) { + console.warn("Error while performing XMLHttpRequest.setRequestHeader()", error); + } + i += 2; + } + + try { + xhr.send(null); + } catch { + return 0; + } + if (xhr.status != 200) return 0; + var uInt8Array = xhr.response; + + var len = uInt8Array.byteLength; + var fileOnWasmHeap = _malloc(len + 4); + + var properArray = new Uint8Array(uInt8Array); + + for (var iii = 0; iii < len; iii++) { + Module.HEAPU8[iii + fileOnWasmHeap + 4] = properArray[iii]; + } + var LEN123 = new Uint8Array(4); + LEN123[0] = len % 256; + len -= LEN123[0]; + len /= 256; + LEN123[1] = len % 256; + len -= LEN123[1]; + len /= 256; + LEN123[2] = len % 256; + len -= LEN123[2]; + len /= 256; + LEN123[3] = len % 256; + len -= LEN123[3]; + len /= 256; + Module.HEAPU8.set(LEN123, fileOnWasmHeap); + return fileOnWasmHeap; + }, + path.c_str(), n, z, "GET"); + // clang-format on + + i = 0; + for (auto h : info.headers) { + free(z[i]); + i++; + free(z[i]); + i++; + } + free(z); + + if (!exe) { + res = make_uniq(HTTPStatusCode::NotFound_404); + res->reason = + "Unknown error, something went wrong in Wasm land! Please consult the console and consider reporting a " + "bug"; + } else { + res = duckdb::make_uniq(HTTPStatusCode::OK_200); + uint64_t LEN = 0; + LEN *= 256; + LEN += ((uint8_t *)exe)[3]; + LEN *= 256; + LEN += ((uint8_t *)exe)[2]; + LEN *= 256; + LEN += ((uint8_t *)exe)[1]; + LEN *= 256; + LEN += ((uint8_t *)exe)[0]; + res->body = string(exe + 4, LEN); + free(exe); + } + + return res; + } + unique_ptr Post(PostRequestInfo &info) override { + unique_ptr res; + + string path = host_port + info.url; + path = info.url; + + int n = 0; + for (auto h : info.headers) { + n++; + } + + char **z = (char **)(void *)malloc(n * 4 * 2); + + int i = 0; + for (auto h : info.headers) { + z[i] = (char *)malloc(h.first.size() * 4 + 1); + memset(z[i], 0, h.first.size() * 4 + 1); + memcpy(z[i], h.first.c_str(), h.first.size()); + i++; + z[i] = (char *)malloc(h.second.size() * 4 + 1); + memset(z[i], 0, h.first.size() * 4 + 1); + memcpy(z[i], h.second.c_str(), h.second.size()); + i++; + } + + // clang-format off + char *exe = NULL; + exe = (char *)EM_ASM_PTR( + { + var url = (UTF8ToString($0)); + if (typeof XMLHttpRequest === "undefined") { + return 0; + } + const xhr = new XMLHttpRequest(); + xhr.open(UTF8ToString($3), url, false); + xhr.responseType = "arraybuffer"; + + var i = 0; + var len = $1; + while (i < len) { + var ptr1 = HEAP32[($2 + (i * 4)) >> 2]; + var ptr2 = HEAP32[($2 + ((i + 1) * 4)) >> 2]; + + try { + xhr.setRequestHeader(encodeURI(UTF8ToString(ptr1)), encodeURI(UTF8ToString(ptr2))); + } catch (error) { + console.warn("Error while performing XMLHttpRequest.setRequestHeader()", error); + } + i += 2; + } + + try { + xhr.send(UTF8ToString($4)); + } catch { + return 0; + } + if (xhr.status != 200) return 0; + var uInt8Array = xhr.response; + + var len = uInt8Array.byteLength; + var fileOnWasmHeap = _malloc(len + 4); + + var properArray = new Uint8Array(uInt8Array); + + for (var iii = 0; iii < len; iii++) { + Module.HEAPU8[iii + fileOnWasmHeap + 4] = properArray[iii]; + } + var LEN123 = new Uint8Array(4); + LEN123[0] = len % 256; + len -= LEN123[0]; + len /= 256; + LEN123[1] = len % 256; + len -= LEN123[1]; + len /= 256; + LEN123[2] = len % 256; + len -= LEN123[2]; + len /= 256; + LEN123[3] = len % 256; + len -= LEN123[3]; + len /= 256; + Module.HEAPU8.set(LEN123, fileOnWasmHeap); + return fileOnWasmHeap; + }, + path.c_str(), n, z, "POST", info.buffer_in); + + i = 0; + for (auto h : info.headers) { + free(z[i]); + i++; + free(z[i]); + i++; + } + free(z); + + if (!exe) { + res = make_uniq(HTTPStatusCode::NotFound_404); + res->reason = + "Unknown error, something went quack in Wasm land! Please consult the console and or the docs at " + "https://duckdb.org/community_extensions/extensions/webmacro"; + } else { + res = duckdb::make_uniq(HTTPStatusCode::OK_200); + uint64_t LEN = 0; + LEN *= 256; + LEN += ((uint8_t *)exe)[3]; + LEN *= 256; + LEN += ((uint8_t *)exe)[2]; + LEN *= 256; + LEN += ((uint8_t *)exe)[1]; + LEN *= 256; + LEN += ((uint8_t *)exe)[0]; + res->body = string(exe + 4, LEN); + free(exe); + } + // clang-format on + + return res; + } + unique_ptr Put(PutRequestInfo &info) override { return nullptr; } + + unique_ptr Head(HeadRequestInfo &info) override { return nullptr; } + + unique_ptr Delete(DeleteRequestInfo &info) override { return nullptr; } + + private: + optional_ptr state; +}; + +unique_ptr HTTPWasmUtil::InitializeClient(HTTPParams &http_params, const string &proto_host_port) { + auto client = make_uniq(http_params.Cast(), proto_host_port); + return std::move(client); +} + +string HTTPWasmUtil::GetName() const { return "WasmHTTPUtils"; } + +} // namespace duckdb diff --git a/lib/src/webdb.cc b/lib/src/webdb.cc index 8d93881f0..8e6d17e06 100644 --- a/lib/src/webdb.cc +++ b/lib/src/webdb.cc @@ -52,6 +52,7 @@ #include "duckdb/web/extensions/json_extension.h" #include "duckdb/web/extensions/parquet_extension.h" #include "duckdb/web/functions/table_function_relation.h" +#include "duckdb/web/http_wasm.h" #include "duckdb/web/io/arrow_ifstream.h" #include "duckdb/web/io/buffered_filesystem.h" #include "duckdb/web/io/file_page_buffer.h" @@ -970,6 +971,11 @@ arrow::Status WebDB::Open(std::string_view args_json) { #endif // WASM_LOADABLE_EXTENSIONS RegisterCustomExtensionOptions(db); + auto& config = duckdb::DBConfig::GetConfig(*db->instance); + if (!config.http_util || config.http_util->GetName() != string("WasmHTTPUtils")) { + config.http_util = make_shared_ptr(); + } + // Reset state that is specific to the old database connections_.clear(); database_.reset();