diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt index fd21694d7..ac20b25d8 100644 --- a/lib/CMakeLists.txt +++ b/lib/CMakeLists.txt @@ -82,6 +82,8 @@ endif () set(DUCKDB_PLATFORM "wasm_mvp") if(EMSCRIPTEN) + # Bump stack size from default 64KB to 1MB + set(WASM_LINK_FLAGS "${WASM_LINK_FLAGS} -sSTACK_SIZE=1048576") # Release build if(CMAKE_BUILD_TYPE STREQUAL "Release") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DNDEBUG") diff --git a/lib/include/duckdb/web/webdb.h b/lib/include/duckdb/web/webdb.h index 0cd847169..5541e8a65 100644 --- a/lib/include/duckdb/web/webdb.h +++ b/lib/include/duckdb/web/webdb.h @@ -57,6 +57,12 @@ class WebDB { /// The connection duckdb::Connection connection_; + /// The statements extracted from the text passed to PendingQuery + std::vector> current_pending_statements_; + /// The index of the currently-running statement (in the above list) + size_t current_pending_statement_index_ = 0; + /// The value of allow_stream_result passed to PendingQuery + bool current_allow_stream_result_ = false; /// The current pending query result (if any) duckdb::unique_ptr current_pending_query_result_ = nullptr; /// The current pending query was canceled diff --git a/lib/src/webdb.cc b/lib/src/webdb.cc index 71641ec2f..88aaa1bb8 100644 --- a/lib/src/webdb.cc +++ b/lib/src/webdb.cc @@ -106,7 +106,9 @@ arrow::Result> WebDB::Connection::MaterializeQuer // Configure the output writer ArrowSchema raw_schema; - ClientProperties options("UTC", ArrowOffsetSize::REGULAR, false, false, false, connection_.context); + bool lossless_conversion = webdb_.config_->arrow_lossless_conversion; + ClientProperties options("UTC", ArrowOffsetSize::REGULAR, false, false, lossless_conversion, + ArrowFormatVersion::V1_0, connection_.context); unordered_map> extension_type_cast; options.arrow_offset_size = ArrowOffsetSize::REGULAR; ArrowConverter::ToArrowSchema(&raw_schema, result->types, result->names, options); @@ -142,7 +144,9 @@ arrow::Result> WebDB::Connection::StreamQueryResu // Import the schema ArrowSchema raw_schema; - ClientProperties options("UTC", ArrowOffsetSize::REGULAR, false, false, false, connection_.context); + bool lossless_conversion = webdb_.config_->arrow_lossless_conversion; + ClientProperties options("UTC", ArrowOffsetSize::REGULAR, false, false, lossless_conversion, + ArrowFormatVersion::V1_0, connection_.context); options.arrow_offset_size = ArrowOffsetSize::REGULAR; ArrowConverter::ToArrowSchema(&raw_schema, current_query_result_->types, current_query_result_->names, options); ARROW_ASSIGN_OR_RAISE(current_schema_, arrow::ImportSchema(&raw_schema)); @@ -170,9 +174,20 @@ arrow::Result> WebDB::Connection::RunQuery(std::s arrow::Result> WebDB::Connection::PendingQuery(std::string_view text, bool allow_stream_result) { try { - // Send the query - auto result = connection_.PendingQuery(std::string{text}, allow_stream_result); - if (result->HasError()) return arrow::Status{arrow::StatusCode::ExecutionError, std::move(result->GetError())}; + auto statements = connection_.ExtractStatements(std::string{text}); + if (statements.size() == 0) { + return arrow::Status{arrow::StatusCode::ExecutionError, "no statements"}; + } + current_pending_statements_ = std::move(statements); + current_pending_statement_index_ = 0; + current_allow_stream_result_ = allow_stream_result; + // Send the first query + auto result = connection_.PendingQuery(std::move(current_pending_statements_[current_pending_statement_index_]), + current_allow_stream_result_); + if (result->HasError()) { + current_pending_statements_.clear(); + return arrow::Status{arrow::StatusCode::ExecutionError, std::move(result->GetError())}; + } current_pending_query_result_ = std::move(result); current_pending_query_was_canceled_ = false; current_query_result_.reset(); @@ -202,8 +217,25 @@ arrow::Result> WebDB::Connection::PollPendingQuer do { switch (current_pending_query_result_->ExecuteTask()) { case PendingExecutionResult::EXECUTION_FINISHED: - case PendingExecutionResult::RESULT_READY: - return StreamQueryResult(current_pending_query_result_->Execute()); + case PendingExecutionResult::RESULT_READY: { + auto result = current_pending_query_result_->Execute(); + current_pending_statement_index_++; + // If this was the last statement, then return the result + if (current_pending_statement_index_ == current_pending_statements_.size()) { + return StreamQueryResult(std::move(result)); + } + // Otherwise, start the next statement + auto pending_result = + connection_.PendingQuery(std::move(current_pending_statements_[current_pending_statement_index_]), + current_allow_stream_result_); + if (pending_result->HasError()) { + current_pending_query_result_.reset(); + current_pending_statements_.clear(); + return arrow::Status{arrow::StatusCode::ExecutionError, std::move(pending_result->GetError())}; + } + current_pending_query_result_ = std::move(pending_result); + break; + } case PendingExecutionResult::BLOCKED: case PendingExecutionResult::NO_TASKS_AVAILABLE: return nullptr; @@ -212,6 +244,7 @@ arrow::Result> WebDB::Connection::PollPendingQuer case PendingExecutionResult::EXECUTION_ERROR: { auto err = current_pending_query_result_->GetError(); current_pending_query_result_.reset(); + current_pending_statements_.clear(); return arrow::Status{arrow::StatusCode::ExecutionError, err}; } } @@ -226,6 +259,7 @@ bool WebDB::Connection::CancelPendingQuery() { if (current_pending_query_result_ != nullptr && current_query_result_ == nullptr) { current_pending_query_was_canceled_ = true; current_pending_query_result_.reset(); + current_pending_statements_.clear(); return true; } else { return false; @@ -294,7 +328,9 @@ DuckDBWasmResultsWrapper WebDB::Connection::FetchQueryResults() { // Serialize the record batch ArrowArray array; - ClientProperties arrow_options("UTC", ArrowOffsetSize::REGULAR, false, false, false, connection_.context); + bool lossless_conversion = webdb_.config_->arrow_lossless_conversion; + ClientProperties arrow_options("UTC", ArrowOffsetSize::REGULAR, false, false, lossless_conversion, + ArrowFormatVersion::V1_0, connection_.context); unordered_map> extension_type_cast; arrow_options.arrow_offset_size = ArrowOffsetSize::REGULAR; ArrowConverter::ToArrowArray(*chunk, &array, arrow_options, extension_type_cast); diff --git a/packages/duckdb-wasm-app/webpack.debug.corp.js b/packages/duckdb-wasm-app/webpack.debug.corp.js index 570b06743..09d3e809f 100644 --- a/packages/duckdb-wasm-app/webpack.debug.corp.js +++ b/packages/duckdb-wasm-app/webpack.debug.corp.js @@ -1,4 +1,4 @@ -import config from './webpack.app.debug.js'; +import config from './webpack.debug.js'; export default { ...config, diff --git a/packages/duckdb-wasm-shell/crate/Cargo.toml b/packages/duckdb-wasm-shell/crate/Cargo.toml index 92dbabc55..5b68e5a5c 100644 --- a/packages/duckdb-wasm-shell/crate/Cargo.toml +++ b/packages/duckdb-wasm-shell/crate/Cargo.toml @@ -54,3 +54,6 @@ wasm-bindgen-test = "0.3.12" [profile.release] lto = true opt-level = "s" + +[package.metadata.wasm-pack.profile.release] +wasm-opt = false diff --git a/packages/duckdb-wasm/README.md b/packages/duckdb-wasm/README.md index 6b7a008bf..2a5ddcb91 100644 --- a/packages/duckdb-wasm/README.md +++ b/packages/duckdb-wasm/README.md @@ -100,7 +100,7 @@ const MANUAL_BUNDLES: duckdb.DuckDBBundles = { }, }; // Select a bundle based on browser checks -const bundle = await duckdb.selectBundle(JSDELIVR_BUNDLES); +const bundle = await duckdb.selectBundle(MANUAL_BUNDLES); // Instantiate the asynchronous version of DuckDB-wasm const worker = new Worker(bundle.mainWorker!); const logger = new duckdb.ConsoleLogger(); diff --git a/packages/duckdb-wasm/package.json b/packages/duckdb-wasm/package.json index 9931a2401..a5c73773a 100644 --- a/packages/duckdb-wasm/package.json +++ b/packages/duckdb-wasm/package.json @@ -38,7 +38,7 @@ "jasmine": "^5.1.0", "jasmine-core": "^5.1.2", "jasmine-spec-reporter": "^7.0.0", - "js-sha256": "^0.11.0", + "js-sha256": "^0.11.1", "karma": "^6.4.2", "karma-chrome-launcher": "^3.2.0", "karma-coverage": "^2.2.1", diff --git a/packages/duckdb-wasm/src/utils/s3_helper.ts b/packages/duckdb-wasm/src/utils/s3_helper.ts index 72390a9e6..46de8456f 100644 --- a/packages/duckdb-wasm/src/utils/s3_helper.ts +++ b/packages/duckdb-wasm/src/utils/s3_helper.ts @@ -143,23 +143,12 @@ export function createS3Headers(params: S3Params, payloadParams: S3PayloadParams '/aws4_request\n' + canonicalRequestHashStr; - // ts-ignore's because library can accept array buffer as key, but TS arg is incorrect const signKey = 'AWS4' + params.secretAccessKey; const kDate = sha256.hmac.arrayBuffer(signKey, params.dateNow); - // Note, js-sha256 has a bug in the TS interface that only supports strings as keys, while we need a bytearray - // as key. PR is open but unmerged: https://github.com/emn178/js-sha256/pull/25 - // eslint-disable-next-line - // @ts-ignore const kRegion = sha256.hmac.arrayBuffer(kDate, params.region); - // eslint-disable-next-line - // @ts-ignore const kService = sha256.hmac.arrayBuffer(kRegion, params.service); - // eslint-disable-next-line - // @ts-ignore const signingKey = sha256.hmac.arrayBuffer(kService, 'aws4_request'); - // eslint-disable-next-line - // @ts-ignore const signature = sha256.hmac(signingKey, stringToSign); res.set( diff --git a/packages/duckdb-wasm/test/index_browser.ts b/packages/duckdb-wasm/test/index_browser.ts index 1588c4fdd..b3959257d 100644 --- a/packages/duckdb-wasm/test/index_browser.ts +++ b/packages/duckdb-wasm/test/index_browser.ts @@ -111,6 +111,7 @@ import { testUDF } from './udf.test'; import { longQueries } from './long_queries.test'; //import { testEXCEL } from './excel.test'; //import { testJSON } from './json.test'; +import { testPivot } from './pivot.test'; const baseURL = window.location.origin; const dataURL = `${baseURL}/data`; @@ -140,3 +141,4 @@ testTokenization(() => db!); testTokenizationAsync(() => adb!); //testEXCEL(() => db!); //testJSON(() => db!); +testPivot(() => db!); diff --git a/packages/duckdb-wasm/test/index_node.ts b/packages/duckdb-wasm/test/index_node.ts index d170ac88f..a2edc6375 100644 --- a/packages/duckdb-wasm/test/index_node.ts +++ b/packages/duckdb-wasm/test/index_node.ts @@ -79,6 +79,7 @@ import { testUDF } from './udf.test'; import { longQueries } from './long_queries.test'; import { testRegressionAsync } from './regression'; import { testFTS } from './fts.test'; +import { testPivot } from './pivot.test'; testUDF(() => db!); longQueries(() => adb!); @@ -101,3 +102,4 @@ testCSVInsertAsync(() => adb!); testTokenization(() => db!); testTokenizationAsync(() => adb!); testFTS(() => db!); +testPivot(() => db!, { skipValuesCheck: true }); diff --git a/packages/duckdb-wasm/test/pivot.test.ts b/packages/duckdb-wasm/test/pivot.test.ts new file mode 100644 index 000000000..1c16e227b --- /dev/null +++ b/packages/duckdb-wasm/test/pivot.test.ts @@ -0,0 +1,56 @@ +import * as duckdb from '../src/'; + +export function testPivot(db: () => duckdb.DuckDBBindings, options?: { skipValuesCheck: boolean }): void { + let conn: duckdb.DuckDBConnection; + beforeEach(() => { + conn = db().connect(); + }); + + afterEach(() => { + conn.close(); + db().flushFiles(); + db().dropFiles(); + }); + + describe('PIVOT', () => { + it('with send', async () => { + conn.query(` +CREATE TABLE cities ( + country VARCHAR, name VARCHAR, year INTEGER, population INTEGER +);`); + conn.query(` +INSERT INTO cities VALUES + ('NL', 'Amsterdam', 2000, 1005), + ('NL', 'Amsterdam', 2010, 1065), + ('NL', 'Amsterdam', 2020, 1158), + ('US', 'Seattle', 2000, 564), + ('US', 'Seattle', 2010, 608), + ('US', 'Seattle', 2020, 738), + ('US', 'New York City', 2000, 8015), + ('US', 'New York City', 2010, 8175), + ('US', 'New York City', 2020, 8772);`); + + const reader = await conn.send(`PIVOT cities ON year USING sum(population);`); + const batches = reader.readAll(); + expect(batches.length).toBe(1); + const batch = batches[0]; + expect(batch.numCols).toBe(5); + expect(batch.numRows).toBe(3); + expect(batch.getChildAt(0)?.toArray()).toEqual(['NL', 'US', 'US']); + expect(batch.getChildAt(1)?.toArray()).toEqual(['Amsterdam', 'Seattle', 'New York City']); + // On Node, the types of these columns are inconsistent in different builds, so we skip the check. + if (!options?.skipValuesCheck) { + // Pivoted columns are int128 + expect(batch.getChildAt(2)?.toArray()).toEqual( + new Uint32Array([1005, 0, 0, 0, 564, 0, 0, 0, 8015, 0, 0, 0]), + ); + expect(batch.getChildAt(3)?.toArray()).toEqual( + new Uint32Array([1065, 0, 0, 0, 608, 0, 0, 0, 8175, 0, 0, 0]), + ); + expect(batch.getChildAt(4)?.toArray()).toEqual( + new Uint32Array([1158, 0, 0, 0, 738, 0, 0, 0, 8772, 0, 0, 0]), + ); + } + }); + }); +} diff --git a/patches/duckdb/extension_install_rework.patch b/patches/duckdb/extension_install_rework.patch index 43e015f44..b1b49f367 100644 --- a/patches/duckdb/extension_install_rework.patch +++ b/patches/duckdb/extension_install_rework.patch @@ -1,9 +1,9 @@ diff --git a/src/include/duckdb/main/database.hpp b/src/include/duckdb/main/database.hpp -index ed956daa64..d4774faac5 100644 +index d3c5fb9bd5..b3d0aaa09e 100644 --- a/src/include/duckdb/main/database.hpp +++ b/src/include/duckdb/main/database.hpp -@@ -97,6 +97,10 @@ private: - shared_ptr log_manager; +@@ -100,6 +100,10 @@ private: + unique_ptr external_file_cache; duckdb_ext_api_v1 (*create_api_v1)(); +public: @@ -31,10 +31,10 @@ index 6ccd1a1156..8040f537b6 100644 //! Debugging repositories (target local, relative paths that are produced by DuckDB's build system) static constexpr const char *BUILD_DEBUG_REPOSITORY_PATH = "./build/debug/repository"; diff --git a/src/main/database.cpp b/src/main/database.cpp -index 084dab6f30..6e21356bd9 100644 +index db6e1ed445..d495aab058 100644 --- a/src/main/database.cpp +++ b/src/main/database.cpp -@@ -344,6 +344,28 @@ DuckDB::DuckDB(DatabaseInstance &instance_p) : instance(instance_p.shared_from_t +@@ -356,6 +356,28 @@ DuckDB::DuckDB(DatabaseInstance &instance_p) : instance(instance_p.shared_from_t DuckDB::~DuckDB() { } @@ -63,11 +63,22 @@ index 084dab6f30..6e21356bd9 100644 SecretManager &DatabaseInstance::GetSecretManager() { return *config.secret_manager; } +@@ -507,6 +529,10 @@ idx_t DuckDB::NumberOfThreads() { + + bool DatabaseInstance::ExtensionIsLoaded(const std::string &name) { + auto extension_name = ExtensionHelper::GetExtensionName(name); ++ if (extension_name == "httpfs") { ++ ExtensionInstallInfo info; ++ SetExtensionLoaded(extension_name, info); ++ } + auto it = loaded_extensions_info.find(extension_name); + return it != loaded_extensions_info.end() && it->second.is_loaded; + } diff --git a/src/main/extension/extension_helper.cpp b/src/main/extension/extension_helper.cpp -index c7b613226a..00885ee6a4 100644 +index 3aaf507faa..be084ef030 100644 --- a/src/main/extension/extension_helper.cpp +++ b/src/main/extension/extension_helper.cpp -@@ -326,7 +326,6 @@ vector ExtensionHelper::UpdateExtensions(ClientContext &c +@@ -337,7 +337,6 @@ vector ExtensionHelper::UpdateExtensions(ClientContext &c vector result; DatabaseInstance &db = DatabaseInstance::GetDatabase(context); @@ -75,7 +86,7 @@ index c7b613226a..00885ee6a4 100644 case_insensitive_set_t seen_extensions; // scan the install directory for installed extensions -@@ -343,7 +342,6 @@ vector ExtensionHelper::UpdateExtensions(ClientContext &c +@@ -354,7 +353,6 @@ vector ExtensionHelper::UpdateExtensions(ClientContext &c result.push_back(UpdateExtensionInternal(context, db, fs, fs.JoinPath(ext_directory, path), extension_name)); }); @@ -84,10 +95,10 @@ index c7b613226a..00885ee6a4 100644 return result; } diff --git a/src/main/extension/extension_install.cpp b/src/main/extension/extension_install.cpp -index e8ab595ab0..fb3e6371a3 100644 +index 2ea03b8e49..d8c710f153 100644 --- a/src/main/extension/extension_install.cpp +++ b/src/main/extension/extension_install.cpp -@@ -155,6 +155,9 @@ bool ExtensionHelper::CreateSuggestions(const string &extension_name, string &me +@@ -145,6 +145,9 @@ bool ExtensionHelper::CreateSuggestions(const string &extension_name, string &me unique_ptr ExtensionHelper::InstallExtension(DatabaseInstance &db, FileSystem &fs, const string &extension, ExtensionInstallOptions &options) { @@ -97,7 +108,7 @@ index e8ab595ab0..fb3e6371a3 100644 #ifdef WASM_LOADABLE_EXTENSIONS // Install is currently a no-op return nullptr; -@@ -165,6 +168,9 @@ unique_ptr ExtensionHelper::InstallExtension(DatabaseInsta +@@ -155,6 +158,9 @@ unique_ptr ExtensionHelper::InstallExtension(DatabaseInsta unique_ptr ExtensionHelper::InstallExtension(ClientContext &context, const string &extension, ExtensionInstallOptions &options) { @@ -107,20 +118,11 @@ index e8ab595ab0..fb3e6371a3 100644 #ifdef WASM_LOADABLE_EXTENSIONS // Install is currently a no-op return nullptr; -@@ -209,7 +215,7 @@ string ExtensionHelper::ExtensionUrlTemplate(optional_ptr DatabaseInstance::extensionsRepos = {}; -+ -+void DatabaseInstance::SetPreferredRepository(const string& extension, const string &repository) { -+ auto &x = extensionsRepos; -+ auto it = x.find(extension); -+ if (it != x.end()) { -+ it->second=repository; -+ } else { -+ x.emplace(extension, repository); -+ } -+} -+ -+string DatabaseInstance::GetPreferredRepository(const string& extension) { -+ const auto &x = extensionsRepos; -+ auto it = x.find(extension); -+ if (it != x.end()) { -+ return it->second; -+ } -+ return ""; -+} -+ -+ - SecretManager &DatabaseInstance::GetSecretManager() { - return *config.secret_manager; - } -@@ -506,6 +528,7 @@ idx_t DuckDB::NumberOfThreads() { - } - - bool DatabaseInstance::ExtensionIsLoaded(const std::string &name) { -+ if (name == "httpfs") return true; - auto extension_name = ExtensionHelper::GetExtensionName(name); - auto it = loaded_extensions_info.find(extension_name); - return it != loaded_extensions_info.end() && it->second.is_loaded; diff --git a/patches/duckdb/revert_arrow_decimal_types.patch b/patches/duckdb/revert_arrow_decimal_types.patch deleted file mode 100644 index 072bb79a9..000000000 --- a/patches/duckdb/revert_arrow_decimal_types.patch +++ /dev/null @@ -1,538 +0,0 @@ -diff --git a/src/common/arrow/arrow_appender.cpp b/src/common/arrow/arrow_appender.cpp -index fd3ae221e3..ee49bdd85c 100644 ---- a/src/common/arrow/arrow_appender.cpp -+++ b/src/common/arrow/arrow_appender.cpp -@@ -215,13 +215,13 @@ static void InitializeFunctionPointers(ArrowAppendData &append_data, const Logic - case LogicalTypeId::DECIMAL: - switch (type.InternalType()) { - case PhysicalType::INT16: -- InitializeAppenderForType>(append_data); -+ InitializeAppenderForType>(append_data); - break; - case PhysicalType::INT32: -- InitializeAppenderForType>(append_data); -+ InitializeAppenderForType>(append_data); - break; - case PhysicalType::INT64: -- InitializeAppenderForType>(append_data); -+ InitializeAppenderForType>(append_data); - break; - case PhysicalType::INT128: - InitializeAppenderForType>(append_data); -diff --git a/src/common/arrow/arrow_converter.cpp b/src/common/arrow/arrow_converter.cpp -index 38cae380de..5369b3d423 100644 ---- a/src/common/arrow/arrow_converter.cpp -+++ b/src/common/arrow/arrow_converter.cpp -@@ -232,24 +232,9 @@ void SetArrowFormat(DuckDBArrowSchemaHolder &root_holder, ArrowSchema &child, co - child.format = "tin"; - break; - case LogicalTypeId::DECIMAL: { -- uint8_t width, scale, bit_width; -- switch (type.InternalType()) { -- case PhysicalType::INT16: -- case PhysicalType::INT32: -- bit_width = 32; -- break; -- case PhysicalType::INT64: -- bit_width = 64; -- break; -- case PhysicalType::INT128: -- bit_width = 128; -- break; -- default: -- throw NotImplementedException("Unsupported internal type For DUCKDB Decimal -> Arrow "); -- } -- -+ uint8_t width, scale; - type.GetDecimalProperties(width, scale); -- string format = "d:" + to_string(width) + "," + to_string(scale) + "," + to_string(bit_width); -+ string format = "d:" + to_string(width) + "," + to_string(scale); - root_holder.owned_type_names.push_back(AddName(format)); - child.format = root_holder.owned_type_names.back().get(); - break; -diff --git a/src/common/enum_util.cpp b/src/common/enum_util.cpp -index 7661e898bf..bbd9150453 100644 ---- a/src/common/enum_util.cpp -+++ b/src/common/enum_util.cpp -@@ -100,7 +100,6 @@ - #include "duckdb/function/partition_stats.hpp" - #include "duckdb/function/scalar/compressed_materialization_utils.hpp" - #include "duckdb/function/scalar/strftime_format.hpp" --#include "duckdb/function/table/arrow/arrow_type_info.hpp" - #include "duckdb/function/table/arrow/enum/arrow_datetime_type.hpp" - #include "duckdb/function/table/arrow/enum/arrow_type_info_type.hpp" - #include "duckdb/function/table/arrow/enum/arrow_variable_size_type.hpp" -@@ -519,20 +518,19 @@ const StringUtil::EnumStringLiteral *GetArrowTypeInfoTypeValues() { - { static_cast(ArrowTypeInfoType::STRUCT), "STRUCT" }, - { static_cast(ArrowTypeInfoType::DATE_TIME), "DATE_TIME" }, - { static_cast(ArrowTypeInfoType::STRING), "STRING" }, -- { static_cast(ArrowTypeInfoType::ARRAY), "ARRAY" }, -- { static_cast(ArrowTypeInfoType::DECIMAL), "DECIMAL" } -+ { static_cast(ArrowTypeInfoType::ARRAY), "ARRAY" } - }; - return values; - } - - template<> - const char* EnumUtil::ToChars(ArrowTypeInfoType value) { -- return StringUtil::EnumToString(GetArrowTypeInfoTypeValues(), 6, "ArrowTypeInfoType", static_cast(value)); -+ return StringUtil::EnumToString(GetArrowTypeInfoTypeValues(), 5, "ArrowTypeInfoType", static_cast(value)); - } - - template<> - ArrowTypeInfoType EnumUtil::FromString(const char *value) { -- return static_cast(StringUtil::StringToEnum(GetArrowTypeInfoTypeValues(), 6, "ArrowTypeInfoType", value)); -+ return static_cast(StringUtil::StringToEnum(GetArrowTypeInfoTypeValues(), 5, "ArrowTypeInfoType", value)); - } - - const StringUtil::EnumStringLiteral *GetArrowVariableSizeTypeValues() { -@@ -1145,26 +1143,6 @@ DebugVectorVerification EnumUtil::FromString(const char - return static_cast(StringUtil::StringToEnum(GetDebugVectorVerificationValues(), 6, "DebugVectorVerification", value)); - } - --const StringUtil::EnumStringLiteral *GetDecimalBitWidthValues() { -- static constexpr StringUtil::EnumStringLiteral values[] { -- { static_cast(DecimalBitWidth::DECIMAL_32), "DECIMAL_32" }, -- { static_cast(DecimalBitWidth::DECIMAL_64), "DECIMAL_64" }, -- { static_cast(DecimalBitWidth::DECIMAL_128), "DECIMAL_128" }, -- { static_cast(DecimalBitWidth::DECIMAL_256), "DECIMAL_256" } -- }; -- return values; --} -- --template<> --const char* EnumUtil::ToChars(DecimalBitWidth value) { -- return StringUtil::EnumToString(GetDecimalBitWidthValues(), 4, "DecimalBitWidth", static_cast(value)); --} -- --template<> --DecimalBitWidth EnumUtil::FromString(const char *value) { -- return static_cast(StringUtil::StringToEnum(GetDecimalBitWidthValues(), 4, "DecimalBitWidth", value)); --} -- - const StringUtil::EnumStringLiteral *GetDefaultOrderByNullTypeValues() { - static constexpr StringUtil::EnumStringLiteral values[] { - { static_cast(DefaultOrderByNullType::INVALID), "INVALID" }, -diff --git a/src/function/table/arrow.cpp b/src/function/table/arrow.cpp -index 8774ccff42..9d9fd22966 100644 ---- a/src/function/table/arrow.cpp -+++ b/src/function/table/arrow.cpp -@@ -241,8 +241,6 @@ bool ArrowTableFunction::ArrowPushdownType(const LogicalType &type) { - case PhysicalType::INT16: - case PhysicalType::INT32: - case PhysicalType::INT64: -- return false; -- case PhysicalType::INT128: - return true; - default: - return false; -diff --git a/src/function/table/arrow/arrow_duck_schema.cpp b/src/function/table/arrow/arrow_duck_schema.cpp -index e518f62f40..964f9d4a5e 100644 ---- a/src/function/table/arrow/arrow_duck_schema.cpp -+++ b/src/function/table/arrow/arrow_duck_schema.cpp -@@ -105,22 +105,7 @@ unique_ptr ArrowType::GetTypeFromFormat(string &format) { - if (width > 38 || bitwidth > 128) { - throw NotImplementedException("Unsupported Internal Arrow Type for Decimal %s", format); - } -- switch (bitwidth) { -- case 32: -- return make_uniq(LogicalType::DECIMAL(NumericCast(width), NumericCast(scale)), -- make_uniq(DecimalBitWidth::DECIMAL_32)); -- case 64: -- return make_uniq(LogicalType::DECIMAL(NumericCast(width), NumericCast(scale)), -- make_uniq(DecimalBitWidth::DECIMAL_64)); -- case 128: -- return make_uniq(LogicalType::DECIMAL(NumericCast(width), NumericCast(scale)), -- make_uniq(DecimalBitWidth::DECIMAL_128)); -- case 256: -- return make_uniq(LogicalType::DECIMAL(NumericCast(width), NumericCast(scale)), -- make_uniq(DecimalBitWidth::DECIMAL_256)); -- default: -- throw NotImplementedException("Unsupported bit-width value of %d for Arrow Decimal type", bitwidth); -- } -+ return make_uniq(LogicalType::DECIMAL(NumericCast(width), NumericCast(scale))); - } else if (format == "u") { - return make_uniq(LogicalType::VARCHAR, make_uniq(ArrowVariableSizeType::NORMAL)); - } else if (format == "U") { -diff --git a/src/function/table/arrow/arrow_type_info.cpp b/src/function/table/arrow/arrow_type_info.cpp -index 8552ac297d..ed9c454494 100644 ---- a/src/function/table/arrow/arrow_type_info.cpp -+++ b/src/function/table/arrow/arrow_type_info.cpp -@@ -52,21 +52,6 @@ ArrowDateTimeType ArrowDateTimeInfo::GetDateTimeType() const { - return size_type; - } - --//===--------------------------------------------------------------------===// --// ArrowDecimalInfo --//===--------------------------------------------------------------------===// -- --ArrowDecimalInfo::ArrowDecimalInfo(DecimalBitWidth bit_width) -- : ArrowTypeInfo(ArrowTypeInfoType::DECIMAL), bit_width(bit_width) { --} -- --ArrowDecimalInfo::~ArrowDecimalInfo() { --} -- --DecimalBitWidth ArrowDecimalInfo::GetBitWidth() const { -- return bit_width; --} -- - //===--------------------------------------------------------------------===// - // ArrowStringInfo - //===--------------------------------------------------------------------===// -diff --git a/src/function/table/arrow_conversion.cpp b/src/function/table/arrow_conversion.cpp -index 73a19ca8a1..3ba7d2b059 100644 ---- a/src/function/table/arrow_conversion.cpp -+++ b/src/function/table/arrow_conversion.cpp -@@ -1,5 +1,3 @@ --#include "duckdb/common/operator/cast_operators.hpp" -- - #include "duckdb/common/exception/conversion_exception.hpp" - #include "duckdb/common/limits.hpp" - #include "duckdb/common/operator/multiply.hpp" -@@ -765,83 +763,6 @@ static void ColumnArrowToDuckDBRunEndEncoded(Vector &vector, const ArrowArray &a - throw NotImplementedException("Type '%s' not implemented for RunEndEncoding", TypeIdToString(physical_type)); - } - } --template --void ConvertDecimal(SRC src_ptr, Vector &vector, ArrowArray &array, idx_t size, int64_t nested_offset, -- uint64_t parent_offset, ArrowScanLocalState &scan_state, ValidityMask &val_mask, -- DecimalBitWidth arrow_bit_width) { -- -- switch (vector.GetType().InternalType()) { -- case PhysicalType::INT16: { -- auto tgt_ptr = FlatVector::GetData(vector); -- for (idx_t row = 0; row < size; row++) { -- if (val_mask.RowIsValid(row)) { -- auto result = TryCast::Operation(src_ptr[row], tgt_ptr[row]); -- D_ASSERT(result); -- (void)result; -- } -- } -- break; -- } -- case PhysicalType::INT32: { -- if (arrow_bit_width == DecimalBitWidth::DECIMAL_32) { -- FlatVector::SetData(vector, ArrowBufferData(array, 1) + -- GetTypeIdSize(vector.GetType().InternalType()) * -- GetEffectiveOffset(array, NumericCast(parent_offset), -- scan_state, nested_offset)); -- } else { -- auto tgt_ptr = FlatVector::GetData(vector); -- for (idx_t row = 0; row < size; row++) { -- if (val_mask.RowIsValid(row)) { -- auto result = TryCast::Operation(src_ptr[row], tgt_ptr[row]); -- D_ASSERT(result); -- (void)result; -- } -- } -- } -- break; -- } -- case PhysicalType::INT64: { -- if (arrow_bit_width == DecimalBitWidth::DECIMAL_64) { -- FlatVector::SetData(vector, ArrowBufferData(array, 1) + -- GetTypeIdSize(vector.GetType().InternalType()) * -- GetEffectiveOffset(array, NumericCast(parent_offset), -- scan_state, nested_offset)); -- } else { -- auto tgt_ptr = FlatVector::GetData(vector); -- for (idx_t row = 0; row < size; row++) { -- if (val_mask.RowIsValid(row)) { -- auto result = TryCast::Operation(src_ptr[row], tgt_ptr[row]); -- D_ASSERT(result); -- (void)result; -- } -- } -- } -- break; -- } -- case PhysicalType::INT128: { -- if (arrow_bit_width == DecimalBitWidth::DECIMAL_128) { -- FlatVector::SetData(vector, ArrowBufferData(array, 1) + -- GetTypeIdSize(vector.GetType().InternalType()) * -- GetEffectiveOffset(array, NumericCast(parent_offset), -- scan_state, nested_offset)); -- } else { -- auto tgt_ptr = FlatVector::GetData(vector); -- for (idx_t row = 0; row < size; row++) { -- if (val_mask.RowIsValid(row)) { -- auto result = TryCast::Operation(src_ptr[row], tgt_ptr[row]); -- D_ASSERT(result); -- (void)result; -- } -- } -- } -- -- break; -- } -- default: -- throw NotImplementedException("Unsupported physical type for Decimal: %s", -- TypeIdToString(vector.GetType().InternalType())); -- } --} - - static void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowArrayScanState &array_state, idx_t size, - const ArrowType &arrow_type, int64_t nested_offset, ValidityMask *parent_mask, -@@ -1077,32 +998,53 @@ static void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowArraySca - } - case LogicalTypeId::DECIMAL: { - auto val_mask = FlatVector::Validity(vector); -- auto &datetime_info = arrow_type.GetTypeInfo(); -- auto bit_width = datetime_info.GetBitWidth(); -- -- switch (bit_width) { -- case DecimalBitWidth::DECIMAL_32: { -- auto src_ptr = ArrowBufferData(array, 1) + -- GetEffectiveOffset(array, NumericCast(parent_offset), scan_state, nested_offset); -- ConvertDecimal(src_ptr, vector, array, size, nested_offset, parent_offset, scan_state, val_mask, bit_width); -+ //! We have to convert from INT128 -+ auto src_ptr = ArrowBufferData(array, 1) + -+ GetEffectiveOffset(array, NumericCast(parent_offset), scan_state, nested_offset); -+ switch (vector.GetType().InternalType()) { -+ case PhysicalType::INT16: { -+ auto tgt_ptr = FlatVector::GetData(vector); -+ for (idx_t row = 0; row < size; row++) { -+ if (val_mask.RowIsValid(row)) { -+ auto result = Hugeint::TryCast(src_ptr[row], tgt_ptr[row]); -+ D_ASSERT(result); -+ (void)result; -+ } -+ } - break; - } -- -- case DecimalBitWidth::DECIMAL_64: { -- auto src_ptr = ArrowBufferData(array, 1) + -- GetEffectiveOffset(array, NumericCast(parent_offset), scan_state, nested_offset); -- ConvertDecimal(src_ptr, vector, array, size, nested_offset, parent_offset, scan_state, val_mask, bit_width); -+ case PhysicalType::INT32: { -+ auto tgt_ptr = FlatVector::GetData(vector); -+ for (idx_t row = 0; row < size; row++) { -+ if (val_mask.RowIsValid(row)) { -+ auto result = Hugeint::TryCast(src_ptr[row], tgt_ptr[row]); -+ D_ASSERT(result); -+ (void)result; -+ } -+ } - break; - } -- -- case DecimalBitWidth::DECIMAL_128: { -- auto src_ptr = ArrowBufferData(array, 1) + -- GetEffectiveOffset(array, NumericCast(parent_offset), scan_state, nested_offset); -- ConvertDecimal(src_ptr, vector, array, size, nested_offset, parent_offset, scan_state, val_mask, bit_width); -+ case PhysicalType::INT64: { -+ auto tgt_ptr = FlatVector::GetData(vector); -+ for (idx_t row = 0; row < size; row++) { -+ if (val_mask.RowIsValid(row)) { -+ auto result = Hugeint::TryCast(src_ptr[row], tgt_ptr[row]); -+ D_ASSERT(result); -+ (void)result; -+ } -+ } -+ break; -+ } -+ case PhysicalType::INT128: { -+ FlatVector::SetData(vector, ArrowBufferData(array, 1) + -+ GetTypeIdSize(vector.GetType().InternalType()) * -+ GetEffectiveOffset(array, NumericCast(parent_offset), -+ scan_state, nested_offset)); - break; - } - default: -- throw NotImplementedException("Unsupported precision for Arrow Decimal Type."); -+ throw NotImplementedException("Unsupported physical type for Decimal: %s", -+ TypeIdToString(vector.GetType().InternalType())); - } - break; - } -diff --git a/src/include/duckdb/common/enum_util.hpp b/src/include/duckdb/common/enum_util.hpp -index 39a652962e..935102ef66 100644 ---- a/src/include/duckdb/common/enum_util.hpp -+++ b/src/include/duckdb/common/enum_util.hpp -@@ -126,8 +126,6 @@ enum class DebugInitialize : uint8_t; - - enum class DebugVectorVerification : uint8_t; - --enum class DecimalBitWidth : uint8_t; -- - enum class DefaultOrderByNullType : uint8_t; - - enum class DependencyEntryType : uint8_t; -@@ -546,9 +544,6 @@ const char* EnumUtil::ToChars(DebugInitialize value); - template<> - const char* EnumUtil::ToChars(DebugVectorVerification value); - --template<> --const char* EnumUtil::ToChars(DecimalBitWidth value); -- - template<> - const char* EnumUtil::ToChars(DefaultOrderByNullType value); - -@@ -1105,9 +1100,6 @@ DebugInitialize EnumUtil::FromString(const char *value); - template<> - DebugVectorVerification EnumUtil::FromString(const char *value); - --template<> --DecimalBitWidth EnumUtil::FromString(const char *value); -- - template<> - DefaultOrderByNullType EnumUtil::FromString(const char *value); - -diff --git a/src/include/duckdb/function/table/arrow/arrow_type_info.hpp b/src/include/duckdb/function/table/arrow/arrow_type_info.hpp -index d2e419646c..15e1aa3fce 100644 ---- a/src/include/duckdb/function/table/arrow/arrow_type_info.hpp -+++ b/src/include/duckdb/function/table/arrow/arrow_type_info.hpp -@@ -84,23 +84,6 @@ private: - ArrowDateTimeType size_type; - }; - --enum class DecimalBitWidth : uint8_t { DECIMAL_32, DECIMAL_64, DECIMAL_128, DECIMAL_256 }; -- --struct ArrowDecimalInfo final : public ArrowTypeInfo { --public: -- static constexpr const ArrowTypeInfoType TYPE = ArrowTypeInfoType::DECIMAL; -- --public: -- explicit ArrowDecimalInfo(DecimalBitWidth bit_width); -- ~ArrowDecimalInfo() override; -- --public: -- DecimalBitWidth GetBitWidth() const; -- --private: -- DecimalBitWidth bit_width; --}; -- - struct ArrowStringInfo : public ArrowTypeInfo { - public: - static constexpr const ArrowTypeInfoType TYPE = ArrowTypeInfoType::STRING; -diff --git a/src/include/duckdb/function/table/arrow/enum/arrow_type_info_type.hpp b/src/include/duckdb/function/table/arrow/enum/arrow_type_info_type.hpp -index 8a345b8c83..52b826c33a 100644 ---- a/src/include/duckdb/function/table/arrow/enum/arrow_type_info_type.hpp -+++ b/src/include/duckdb/function/table/arrow/enum/arrow_type_info_type.hpp -@@ -4,6 +4,6 @@ - - namespace duckdb { - --enum class ArrowTypeInfoType : uint8_t { LIST, STRUCT, DATE_TIME, STRING, ARRAY, DECIMAL }; -+enum class ArrowTypeInfoType : uint8_t { LIST, STRUCT, DATE_TIME, STRING, ARRAY }; - - } // namespace duckdb -diff --git a/tools/pythonpkg/src/arrow/arrow_array_stream.cpp b/tools/pythonpkg/src/arrow/arrow_array_stream.cpp -index c8e8cc0911..cee4f0b102 100644 ---- a/tools/pythonpkg/src/arrow/arrow_array_stream.cpp -+++ b/tools/pythonpkg/src/arrow/arrow_array_stream.cpp -@@ -282,23 +282,7 @@ py::object GetScalar(Value &constant, const string &timezone_config, const Arrow - case LogicalTypeId::BLOB: - return dataset_scalar(py::bytes(constant.GetValueUnsafe())); - case LogicalTypeId::DECIMAL: { -- py::object decimal_type; -- auto &datetime_info = type.GetTypeInfo(); -- auto bit_width = datetime_info.GetBitWidth(); -- switch (bit_width) { -- case DecimalBitWidth::DECIMAL_32: -- decimal_type = py::module_::import("pyarrow").attr("decimal32"); -- break; -- case DecimalBitWidth::DECIMAL_64: -- decimal_type = py::module_::import("pyarrow").attr("decimal64"); -- break; -- case DecimalBitWidth::DECIMAL_128: -- decimal_type = py::module_::import("pyarrow").attr("decimal128"); -- break; -- default: -- throw NotImplementedException("Unsupported precision for Arrow Decimal Type."); -- } -- -+ py::object decimal_type = py::module_::import("pyarrow").attr("decimal128"); - uint8_t width; - uint8_t scale; - constant.type().GetDecimalProperties(width, scale); -diff --git a/tools/pythonpkg/tests/fast/arrow/test_arrow_decimal_32_64.py b/tools/pythonpkg/tests/fast/arrow/test_arrow_decimal_32_64.py -deleted file mode 100644 -index b216ad0875..0000000000 ---- a/tools/pythonpkg/tests/fast/arrow/test_arrow_decimal_32_64.py -+++ /dev/null -@@ -1,64 +0,0 @@ --import duckdb --import pytest --from decimal import Decimal -- --pa = pytest.importorskip("pyarrow") -- -- --class TestArrowDecimalTypes(object): -- def test_decimal_32(self, duckdb_cursor): -- duckdb_cursor = duckdb.connect() -- decimal_32 = pa.Table.from_pylist( -- [ -- {"data": Decimal("100.20")}, -- {"data": Decimal("110.21")}, -- {"data": Decimal("31.20")}, -- {"data": Decimal("500.20")}, -- ], -- pa.schema([("data", pa.decimal32(5, 2))]), -- ) -- # Test scan -- assert duckdb_cursor.execute("FROM decimal_32").fetchall() == [ -- (Decimal('100.20'),), -- (Decimal('110.21'),), -- (Decimal('31.20'),), -- (Decimal('500.20'),), -- ] -- # Test filter pushdown -- assert duckdb_cursor.execute("SELECT COUNT(*) FROM decimal_32 where data > 100 and data < 200 ").fetchall() == [ -- (2,) -- ] -- -- # Test write -- arrow_table = duckdb_cursor.execute("FROM decimal_32").arrow() -- -- assert arrow_table.equals(decimal_32) -- -- def test_decimal_64(self, duckdb_cursor): -- duckdb_cursor = duckdb.connect() -- decimal_64 = pa.Table.from_pylist( -- [ -- {"data": Decimal("1000.231")}, -- {"data": Decimal("1100.231")}, -- {"data": Decimal("999999999999.231")}, -- {"data": Decimal("500.20")}, -- ], -- pa.schema([("data", pa.decimal64(16, 3))]), -- ) -- -- # Test scan -- assert duckdb_cursor.execute("FROM decimal_64").fetchall() == [ -- (Decimal('1000.231'),), -- (Decimal('1100.231'),), -- (Decimal('999999999999.231'),), -- (Decimal('500.200'),), -- ] -- -- # Test Filter pushdown -- assert duckdb_cursor.execute( -- "SELECT COUNT(*) FROM decimal_64 WHERE data > 1000 and data < 1200" -- ).fetchall() == [(2,)] -- -- # Test write -- arrow_table = duckdb_cursor.execute("FROM decimal_64").arrow() -- assert arrow_table.equals(decimal_64) -diff --git a/tools/pythonpkg/tests/fast/arrow/test_arrow_run_end_encoding.py b/tools/pythonpkg/tests/fast/arrow/test_arrow_run_end_encoding.py -index 4fed04ac24..fa27167458 100644 ---- a/tools/pythonpkg/tests/fast/arrow/test_arrow_run_end_encoding.py -+++ b/tools/pythonpkg/tests/fast/arrow/test_arrow_run_end_encoding.py -@@ -91,9 +91,9 @@ class TestArrowREE(object): - ('TIMESTAMP', "'1992-03-22 01:02:03'", "'2022-11-07 08:43:04.123456'"), - ('TIMESTAMP_MS', "'1992-03-22 01:02:03'", "'2022-11-07 08:43:04.123456'"), - ('TIMESTAMP_NS', "'1992-03-22 01:02:03'", "'2022-11-07 08:43:04.123456'"), -- # ('DECIMAL(4,2)', "'12.23'", "'99.99'"), REE not supported for decimal32 -- # ('DECIMAL(7,6)', "'1.234234'", "'0.000001'"), REE not supported for decimal32 -- # ('DECIMAL(14,7)', "'134523.234234'", "'999999.000001'"), REE not supported for decimal64 -+ ('DECIMAL(4,2)', "'12.23'", "'99.99'"), -+ ('DECIMAL(7,6)', "'1.234234'", "'0.000001'"), -+ ('DECIMAL(14,7)', "'134523.234234'", "'999999.000001'"), - ('DECIMAL(28,1)', "'12345678910111234123456789.1'", "'999999999999999999999999999.9'"), - ('UUID', "'10acd298-15d7-417c-8b59-eabb5a2bacab'", "'eeccb8c5-9943-b2bb-bb5e-222f4e14b687'"), - ('BIT', "'01010101010000'", "'01010100010101010101010101111111111'"), diff --git a/submodules/duckdb b/submodules/duckdb index 71c5c07cd..5b3ac8a8f 160000 --- a/submodules/duckdb +++ b/submodules/duckdb @@ -1 +1 @@ -Subproject commit 71c5c07cdd295e9409c0505885033ae9eb6b5ddd +Subproject commit 5b3ac8a8f6b6766d9ed5580bba616b0746c79c0b diff --git a/yarn.lock b/yarn.lock index 71e0d0e24..61b7f3002 100644 --- a/yarn.lock +++ b/yarn.lock @@ -5113,10 +5113,10 @@ js-cookie@^3.0.5: resolved "https://registry.yarnpkg.com/js-cookie/-/js-cookie-3.0.5.tgz#0b7e2fd0c01552c58ba86e0841f94dc2557dcdbc" integrity sha512-cEiJEAEoIbWfCZYKWhVwFuvPX1gETRYPw6LlaTKoxD3s2AkXzkCjnp6h0V77ozyqj0jakteJ4YqDJT830+lVGw== -js-sha256@^0.11.0: - version "0.11.0" - resolved "https://registry.yarnpkg.com/js-sha256/-/js-sha256-0.11.0.tgz#256a921d9292f7fe98905face82e367abaca9576" - integrity sha512-6xNlKayMZvds9h1Y1VWc0fQHQ82BxTXizWPEtEeGvmOUYpBRy4gbWroHLpzowe6xiQhHpelCQiE7HEdznyBL9Q== +js-sha256@^0.11.1: + version "0.11.1" + resolved "https://registry.yarnpkg.com/js-sha256/-/js-sha256-0.11.1.tgz#712262e8fc9569d6f7f6eea72c0d8e5ccc7c976c" + integrity sha512-o6WSo/LUvY2uC4j7mO50a2ms7E/EAdbP0swigLV+nzHKTTaYnaLIWJ02VdXrsJX0vGedDESQnLsOekr94ryfjg== "js-tokens@^3.0.0 || ^4.0.0", js-tokens@^4.0.0: version "4.0.0"