Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions lib/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,8 @@ endif ()
set(DUCKDB_PLATFORM "wasm_mvp")

if(EMSCRIPTEN)
# Bump stack size from default 64KB to 1MB
set(WASM_LINK_FLAGS "${WASM_LINK_FLAGS} -sSTACK_SIZE=1048576")
# Release build
if(CMAKE_BUILD_TYPE STREQUAL "Release")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DNDEBUG")
Expand Down
6 changes: 6 additions & 0 deletions lib/include/duckdb/web/webdb.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,12 @@ class WebDB {
/// The connection
duckdb::Connection connection_;

/// The statements extracted from the text passed to PendingQuery
std::vector<duckdb::unique_ptr<duckdb::SQLStatement>> current_pending_statements_;
/// The index of the currently-running statement (in the above list)
size_t current_pending_statement_index_ = 0;
/// The value of allow_stream_result passed to PendingQuery
bool current_allow_stream_result_ = false;
/// The current pending query result (if any)
duckdb::unique_ptr<duckdb::PendingQueryResult> current_pending_query_result_ = nullptr;
/// The current pending query was canceled
Expand Down
52 changes: 44 additions & 8 deletions lib/src/webdb.cc
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,9 @@ arrow::Result<std::shared_ptr<arrow::Buffer>> WebDB::Connection::MaterializeQuer

// Configure the output writer
ArrowSchema raw_schema;
ClientProperties options("UTC", ArrowOffsetSize::REGULAR, false, false, false, connection_.context);
bool lossless_conversion = webdb_.config_->arrow_lossless_conversion;
ClientProperties options("UTC", ArrowOffsetSize::REGULAR, false, false, lossless_conversion,
ArrowFormatVersion::V1_0, connection_.context);
unordered_map<idx_t, const shared_ptr<ArrowTypeExtensionData>> extension_type_cast;
options.arrow_offset_size = ArrowOffsetSize::REGULAR;
ArrowConverter::ToArrowSchema(&raw_schema, result->types, result->names, options);
Expand Down Expand Up @@ -142,7 +144,9 @@ arrow::Result<std::shared_ptr<arrow::Buffer>> WebDB::Connection::StreamQueryResu

// Import the schema
ArrowSchema raw_schema;
ClientProperties options("UTC", ArrowOffsetSize::REGULAR, false, false, false, connection_.context);
bool lossless_conversion = webdb_.config_->arrow_lossless_conversion;
ClientProperties options("UTC", ArrowOffsetSize::REGULAR, false, false, lossless_conversion,
ArrowFormatVersion::V1_0, connection_.context);
options.arrow_offset_size = ArrowOffsetSize::REGULAR;
ArrowConverter::ToArrowSchema(&raw_schema, current_query_result_->types, current_query_result_->names, options);
ARROW_ASSIGN_OR_RAISE(current_schema_, arrow::ImportSchema(&raw_schema));
Expand Down Expand Up @@ -170,9 +174,20 @@ arrow::Result<std::shared_ptr<arrow::Buffer>> WebDB::Connection::RunQuery(std::s
arrow::Result<std::shared_ptr<arrow::Buffer>> WebDB::Connection::PendingQuery(std::string_view text,
bool allow_stream_result) {
try {
// Send the query
auto result = connection_.PendingQuery(std::string{text}, allow_stream_result);
if (result->HasError()) return arrow::Status{arrow::StatusCode::ExecutionError, std::move(result->GetError())};
auto statements = connection_.ExtractStatements(std::string{text});
if (statements.size() == 0) {
return arrow::Status{arrow::StatusCode::ExecutionError, "no statements"};
}
current_pending_statements_ = std::move(statements);
current_pending_statement_index_ = 0;
current_allow_stream_result_ = allow_stream_result;
// Send the first query
auto result = connection_.PendingQuery(std::move(current_pending_statements_[current_pending_statement_index_]),
current_allow_stream_result_);
if (result->HasError()) {
current_pending_statements_.clear();
return arrow::Status{arrow::StatusCode::ExecutionError, std::move(result->GetError())};
}
current_pending_query_result_ = std::move(result);
current_pending_query_was_canceled_ = false;
current_query_result_.reset();
Expand Down Expand Up @@ -202,8 +217,25 @@ arrow::Result<std::shared_ptr<arrow::Buffer>> WebDB::Connection::PollPendingQuer
do {
switch (current_pending_query_result_->ExecuteTask()) {
case PendingExecutionResult::EXECUTION_FINISHED:
case PendingExecutionResult::RESULT_READY:
return StreamQueryResult(current_pending_query_result_->Execute());
case PendingExecutionResult::RESULT_READY: {
auto result = current_pending_query_result_->Execute();
current_pending_statement_index_++;
// If this was the last statement, then return the result
if (current_pending_statement_index_ == current_pending_statements_.size()) {
return StreamQueryResult(std::move(result));
}
// Otherwise, start the next statement
auto pending_result =
connection_.PendingQuery(std::move(current_pending_statements_[current_pending_statement_index_]),
current_allow_stream_result_);
if (pending_result->HasError()) {
current_pending_query_result_.reset();
current_pending_statements_.clear();
return arrow::Status{arrow::StatusCode::ExecutionError, std::move(pending_result->GetError())};
}
current_pending_query_result_ = std::move(pending_result);
break;
}
case PendingExecutionResult::BLOCKED:
case PendingExecutionResult::NO_TASKS_AVAILABLE:
return nullptr;
Expand All @@ -212,6 +244,7 @@ arrow::Result<std::shared_ptr<arrow::Buffer>> WebDB::Connection::PollPendingQuer
case PendingExecutionResult::EXECUTION_ERROR: {
auto err = current_pending_query_result_->GetError();
current_pending_query_result_.reset();
current_pending_statements_.clear();
return arrow::Status{arrow::StatusCode::ExecutionError, err};
}
}
Expand All @@ -226,6 +259,7 @@ bool WebDB::Connection::CancelPendingQuery() {
if (current_pending_query_result_ != nullptr && current_query_result_ == nullptr) {
current_pending_query_was_canceled_ = true;
current_pending_query_result_.reset();
current_pending_statements_.clear();
return true;
} else {
return false;
Expand Down Expand Up @@ -294,7 +328,9 @@ DuckDBWasmResultsWrapper WebDB::Connection::FetchQueryResults() {

// Serialize the record batch
ArrowArray array;
ClientProperties arrow_options("UTC", ArrowOffsetSize::REGULAR, false, false, false, connection_.context);
bool lossless_conversion = webdb_.config_->arrow_lossless_conversion;
ClientProperties arrow_options("UTC", ArrowOffsetSize::REGULAR, false, false, lossless_conversion,
ArrowFormatVersion::V1_0, connection_.context);
unordered_map<idx_t, const shared_ptr<ArrowTypeExtensionData>> extension_type_cast;
arrow_options.arrow_offset_size = ArrowOffsetSize::REGULAR;
ArrowConverter::ToArrowArray(*chunk, &array, arrow_options, extension_type_cast);
Expand Down
2 changes: 1 addition & 1 deletion packages/duckdb-wasm-app/webpack.debug.corp.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import config from './webpack.app.debug.js';
import config from './webpack.debug.js';

export default {
...config,
Expand Down
3 changes: 3 additions & 0 deletions packages/duckdb-wasm-shell/crate/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -54,3 +54,6 @@ wasm-bindgen-test = "0.3.12"
[profile.release]
lto = true
opt-level = "s"

[package.metadata.wasm-pack.profile.release]
wasm-opt = false
2 changes: 1 addition & 1 deletion packages/duckdb-wasm/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ const MANUAL_BUNDLES: duckdb.DuckDBBundles = {
},
};
// Select a bundle based on browser checks
const bundle = await duckdb.selectBundle(JSDELIVR_BUNDLES);
const bundle = await duckdb.selectBundle(MANUAL_BUNDLES);
// Instantiate the asynchronous version of DuckDB-wasm
const worker = new Worker(bundle.mainWorker!);
const logger = new duckdb.ConsoleLogger();
Expand Down
2 changes: 1 addition & 1 deletion packages/duckdb-wasm/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
"jasmine": "^5.1.0",
"jasmine-core": "^5.1.2",
"jasmine-spec-reporter": "^7.0.0",
"js-sha256": "^0.11.0",
"js-sha256": "^0.11.1",
"karma": "^6.4.2",
"karma-chrome-launcher": "^3.2.0",
"karma-coverage": "^2.2.1",
Expand Down
11 changes: 0 additions & 11 deletions packages/duckdb-wasm/src/utils/s3_helper.ts
Original file line number Diff line number Diff line change
Expand Up @@ -143,23 +143,12 @@ export function createS3Headers(params: S3Params, payloadParams: S3PayloadParams
'/aws4_request\n' +
canonicalRequestHashStr;

// ts-ignore's because library can accept array buffer as key, but TS arg is incorrect
const signKey = 'AWS4' + params.secretAccessKey;
const kDate = sha256.hmac.arrayBuffer(signKey, params.dateNow);

// Note, js-sha256 has a bug in the TS interface that only supports strings as keys, while we need a bytearray
// as key. PR is open but unmerged: https://github.com/emn178/js-sha256/pull/25
// eslint-disable-next-line
// @ts-ignore
const kRegion = sha256.hmac.arrayBuffer(kDate, params.region);
// eslint-disable-next-line
// @ts-ignore
const kService = sha256.hmac.arrayBuffer(kRegion, params.service);
// eslint-disable-next-line
// @ts-ignore
const signingKey = sha256.hmac.arrayBuffer(kService, 'aws4_request');
// eslint-disable-next-line
// @ts-ignore
const signature = sha256.hmac(signingKey, stringToSign);

res.set(
Expand Down
2 changes: 2 additions & 0 deletions packages/duckdb-wasm/test/index_browser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ import { testUDF } from './udf.test';
import { longQueries } from './long_queries.test';
//import { testEXCEL } from './excel.test';
//import { testJSON } from './json.test';
import { testPivot } from './pivot.test';

const baseURL = window.location.origin;
const dataURL = `${baseURL}/data`;
Expand Down Expand Up @@ -140,3 +141,4 @@ testTokenization(() => db!);
testTokenizationAsync(() => adb!);
//testEXCEL(() => db!);
//testJSON(() => db!);
testPivot(() => db!);
2 changes: 2 additions & 0 deletions packages/duckdb-wasm/test/index_node.ts
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ import { testUDF } from './udf.test';
import { longQueries } from './long_queries.test';
import { testRegressionAsync } from './regression';
import { testFTS } from './fts.test';
import { testPivot } from './pivot.test';

testUDF(() => db!);
longQueries(() => adb!);
Expand All @@ -101,3 +102,4 @@ testCSVInsertAsync(() => adb!);
testTokenization(() => db!);
testTokenizationAsync(() => adb!);
testFTS(() => db!);
testPivot(() => db!, { skipValuesCheck: true });
56 changes: 56 additions & 0 deletions packages/duckdb-wasm/test/pivot.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
import * as duckdb from '../src/';

export function testPivot(db: () => duckdb.DuckDBBindings, options?: { skipValuesCheck: boolean }): void {
let conn: duckdb.DuckDBConnection;
beforeEach(() => {
conn = db().connect();
});

afterEach(() => {
conn.close();
db().flushFiles();
db().dropFiles();
});

describe('PIVOT', () => {
it('with send', async () => {
conn.query(`
CREATE TABLE cities (
country VARCHAR, name VARCHAR, year INTEGER, population INTEGER
);`);
conn.query(`
INSERT INTO cities VALUES
('NL', 'Amsterdam', 2000, 1005),
('NL', 'Amsterdam', 2010, 1065),
('NL', 'Amsterdam', 2020, 1158),
('US', 'Seattle', 2000, 564),
('US', 'Seattle', 2010, 608),
('US', 'Seattle', 2020, 738),
('US', 'New York City', 2000, 8015),
('US', 'New York City', 2010, 8175),
('US', 'New York City', 2020, 8772);`);

const reader = await conn.send(`PIVOT cities ON year USING sum(population);`);
const batches = reader.readAll();
expect(batches.length).toBe(1);
const batch = batches[0];
expect(batch.numCols).toBe(5);
expect(batch.numRows).toBe(3);
expect(batch.getChildAt(0)?.toArray()).toEqual(['NL', 'US', 'US']);
expect(batch.getChildAt(1)?.toArray()).toEqual(['Amsterdam', 'Seattle', 'New York City']);
// On Node, the types of these columns are inconsistent in different builds, so we skip the check.
if (!options?.skipValuesCheck) {
// Pivoted columns are int128
expect(batch.getChildAt(2)?.toArray()).toEqual(
new Uint32Array([1005, 0, 0, 0, 564, 0, 0, 0, 8015, 0, 0, 0]),
);
expect(batch.getChildAt(3)?.toArray()).toEqual(
new Uint32Array([1065, 0, 0, 0, 608, 0, 0, 0, 8175, 0, 0, 0]),
);
expect(batch.getChildAt(4)?.toArray()).toEqual(
new Uint32Array([1158, 0, 0, 0, 738, 0, 0, 0, 8772, 0, 0, 0]),
);
}
});
});
}
52 changes: 27 additions & 25 deletions patches/duckdb/extension_install_rework.patch
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
diff --git a/src/include/duckdb/main/database.hpp b/src/include/duckdb/main/database.hpp
index ed956daa64..d4774faac5 100644
index d3c5fb9bd5..b3d0aaa09e 100644
--- a/src/include/duckdb/main/database.hpp
+++ b/src/include/duckdb/main/database.hpp
@@ -97,6 +97,10 @@ private:
shared_ptr<LogManager> log_manager;
@@ -100,6 +100,10 @@ private:
unique_ptr<ExternalFileCache> external_file_cache;

duckdb_ext_api_v1 (*create_api_v1)();
+public:
Expand Down Expand Up @@ -31,10 +31,10 @@ index 6ccd1a1156..8040f537b6 100644
//! Debugging repositories (target local, relative paths that are produced by DuckDB's build system)
static constexpr const char *BUILD_DEBUG_REPOSITORY_PATH = "./build/debug/repository";
diff --git a/src/main/database.cpp b/src/main/database.cpp
index 084dab6f30..6e21356bd9 100644
index db6e1ed445..d495aab058 100644
--- a/src/main/database.cpp
+++ b/src/main/database.cpp
@@ -344,6 +344,28 @@ DuckDB::DuckDB(DatabaseInstance &instance_p) : instance(instance_p.shared_from_t
@@ -356,6 +356,28 @@ DuckDB::DuckDB(DatabaseInstance &instance_p) : instance(instance_p.shared_from_t
DuckDB::~DuckDB() {
}

Expand Down Expand Up @@ -63,19 +63,30 @@ index 084dab6f30..6e21356bd9 100644
SecretManager &DatabaseInstance::GetSecretManager() {
return *config.secret_manager;
}
@@ -507,6 +529,10 @@ idx_t DuckDB::NumberOfThreads() {

bool DatabaseInstance::ExtensionIsLoaded(const std::string &name) {
auto extension_name = ExtensionHelper::GetExtensionName(name);
+ if (extension_name == "httpfs") {
+ ExtensionInstallInfo info;
+ SetExtensionLoaded(extension_name, info);
+ }
auto it = loaded_extensions_info.find(extension_name);
return it != loaded_extensions_info.end() && it->second.is_loaded;
}
diff --git a/src/main/extension/extension_helper.cpp b/src/main/extension/extension_helper.cpp
index c7b613226a..00885ee6a4 100644
index 3aaf507faa..be084ef030 100644
--- a/src/main/extension/extension_helper.cpp
+++ b/src/main/extension/extension_helper.cpp
@@ -326,7 +326,6 @@ vector<ExtensionUpdateResult> ExtensionHelper::UpdateExtensions(ClientContext &c
@@ -337,7 +337,6 @@ vector<ExtensionUpdateResult> ExtensionHelper::UpdateExtensions(ClientContext &c
vector<ExtensionUpdateResult> result;
DatabaseInstance &db = DatabaseInstance::GetDatabase(context);

-#ifndef WASM_LOADABLE_EXTENSIONS
case_insensitive_set_t seen_extensions;

// scan the install directory for installed extensions
@@ -343,7 +342,6 @@ vector<ExtensionUpdateResult> ExtensionHelper::UpdateExtensions(ClientContext &c
@@ -354,7 +353,6 @@ vector<ExtensionUpdateResult> ExtensionHelper::UpdateExtensions(ClientContext &c

result.push_back(UpdateExtensionInternal(context, db, fs, fs.JoinPath(ext_directory, path), extension_name));
});
Expand All @@ -84,10 +95,10 @@ index c7b613226a..00885ee6a4 100644
return result;
}
diff --git a/src/main/extension/extension_install.cpp b/src/main/extension/extension_install.cpp
index e8ab595ab0..fb3e6371a3 100644
index 2ea03b8e49..d8c710f153 100644
--- a/src/main/extension/extension_install.cpp
+++ b/src/main/extension/extension_install.cpp
@@ -155,6 +155,9 @@ bool ExtensionHelper::CreateSuggestions(const string &extension_name, string &me
@@ -145,6 +145,9 @@ bool ExtensionHelper::CreateSuggestions(const string &extension_name, string &me
unique_ptr<ExtensionInstallInfo> ExtensionHelper::InstallExtension(DatabaseInstance &db, FileSystem &fs,
const string &extension,
ExtensionInstallOptions &options) {
Expand All @@ -97,7 +108,7 @@ index e8ab595ab0..fb3e6371a3 100644
#ifdef WASM_LOADABLE_EXTENSIONS
// Install is currently a no-op
return nullptr;
@@ -165,6 +168,9 @@ unique_ptr<ExtensionInstallInfo> ExtensionHelper::InstallExtension(DatabaseInsta
@@ -155,6 +158,9 @@ unique_ptr<ExtensionInstallInfo> ExtensionHelper::InstallExtension(DatabaseInsta

unique_ptr<ExtensionInstallInfo> ExtensionHelper::InstallExtension(ClientContext &context, const string &extension,
ExtensionInstallOptions &options) {
Expand All @@ -107,20 +118,11 @@ index e8ab595ab0..fb3e6371a3 100644
#ifdef WASM_LOADABLE_EXTENSIONS
// Install is currently a no-op
return nullptr;
@@ -209,7 +215,7 @@ string ExtensionHelper::ExtensionUrlTemplate(optional_ptr<const DatabaseInstance
versioned_path = "/${REVISION}/${PLATFORM}/${NAME}.duckdb_extension";
}
#ifdef WASM_LOADABLE_EXTENSIONS
- string default_endpoint = DEFAULT_REPOSITORY;
+ string default_endpoint = ExtensionRepository::DEFAULT_REPOSITORY_URL;
versioned_path = versioned_path + ".wasm";
#else
string default_endpoint = ExtensionRepository::DEFAULT_REPOSITORY_URL;
diff --git a/src/main/extension/extension_load.cpp b/src/main/extension/extension_load.cpp
index 84b28fef09..ea2ebba1cd 100644
index 6e4bb18796..188931a84d 100644
--- a/src/main/extension/extension_load.cpp
+++ b/src/main/extension/extension_load.cpp
@@ -328,7 +328,20 @@ bool ExtensionHelper::TryInitialLoad(DatabaseInstance &db, FileSystem &fs, const
@@ -327,7 +327,20 @@ bool ExtensionHelper::TryInitialLoad(DatabaseInstance &db, FileSystem &fs, const
direct_load = false;
string extension_name = ApplyExtensionAlias(extension);
#ifdef WASM_LOADABLE_EXTENSIONS
Expand All @@ -142,7 +144,7 @@ index 84b28fef09..ea2ebba1cd 100644
string url = ExtensionFinalizeUrlTemplate(url_template, extension_name);

char *str = (char *)EM_ASM_PTR(
@@ -369,35 +382,31 @@ bool ExtensionHelper::TryInitialLoad(DatabaseInstance &db, FileSystem &fs, const
@@ -368,35 +381,31 @@ bool ExtensionHelper::TryInitialLoad(DatabaseInstance &db, FileSystem &fs, const
direct_load = true;
filename = fs.ExpandPath(filename);
}
Expand Down Expand Up @@ -198,7 +200,7 @@ index 84b28fef09..ea2ebba1cd 100644

if (!metadata_mismatch_error.empty()) {
throw InvalidInputException(metadata_mismatch_error);
@@ -414,26 +423,192 @@ bool ExtensionHelper::TryInitialLoad(DatabaseInstance &db, FileSystem &fs, const
@@ -413,26 +422,192 @@ bool ExtensionHelper::TryInitialLoad(DatabaseInstance &db, FileSystem &fs, const
}
}

Expand Down Expand Up @@ -401,7 +403,7 @@ index 84b28fef09..ea2ebba1cd 100644
#else
auto dopen_from = filename;
#endif
@@ -449,28 +624,30 @@ bool ExtensionHelper::TryInitialLoad(DatabaseInstance &db, FileSystem &fs, const
@@ -448,28 +623,30 @@ bool ExtensionHelper::TryInitialLoad(DatabaseInstance &db, FileSystem &fs, const
result.filebase = lowercase_extension_name;
result.filename = filename;
result.lib_hdl = lib_hdl;
Expand Down
Loading