diff --git a/src/duckdb/extension/json/include/json_multi_file_info.hpp b/src/duckdb/extension/json/include/json_multi_file_info.hpp index 821036410..061a6a459 100644 --- a/src/duckdb/extension/json/include/json_multi_file_info.hpp +++ b/src/duckdb/extension/json/include/json_multi_file_info.hpp @@ -19,8 +19,7 @@ class JSONFileReaderOptions : public BaseFileReaderOptions { }; struct JSONMultiFileInfo : MultiFileReaderInterface { - static unique_ptr InitializeInterface(ClientContext &context, MultiFileReader &reader, - MultiFileList &file_list); + static unique_ptr CreateInterface(ClientContext &context); unique_ptr InitializeOptions(ClientContext &context, optional_ptr info) override; @@ -49,6 +48,7 @@ struct JSONMultiFileInfo : MultiFileReaderInterface { void FinishReading(ClientContext &context, GlobalTableFunctionState &global_state, LocalTableFunctionState &local_state) override; unique_ptr GetCardinality(const MultiFileBindData &bind_data, idx_t file_count) override; + FileGlobInput GetGlobInput() override; }; } // namespace duckdb diff --git a/src/duckdb/extension/json/json_multi_file_info.cpp b/src/duckdb/extension/json/json_multi_file_info.cpp index 70f94ba5d..7771af489 100644 --- a/src/duckdb/extension/json/json_multi_file_info.cpp +++ b/src/duckdb/extension/json/json_multi_file_info.cpp @@ -4,8 +4,7 @@ namespace duckdb { -unique_ptr -JSONMultiFileInfo::InitializeInterface(ClientContext &context, MultiFileReader &reader, MultiFileList &file_list) { +unique_ptr JSONMultiFileInfo::CreateInterface(ClientContext &context) { return make_uniq(); } @@ -579,4 +578,8 @@ optional_idx JSONMultiFileInfo::MaxThreads(const MultiFileBindData &bind_data, c return json_data.max_threads; } +FileGlobInput JSONMultiFileInfo::GetGlobInput() { + return FileGlobInput(FileGlobOptions::FALLBACK_GLOB, "json"); +} + } // namespace duckdb diff --git a/src/duckdb/extension/parquet/include/parquet_multi_file_info.hpp b/src/duckdb/extension/parquet/include/parquet_multi_file_info.hpp index 0245c710e..814e6f63d 100644 --- a/src/duckdb/extension/parquet/include/parquet_multi_file_info.hpp +++ b/src/duckdb/extension/parquet/include/parquet_multi_file_info.hpp @@ -24,8 +24,7 @@ class ParquetFileReaderOptions : public BaseFileReaderOptions { }; struct ParquetMultiFileInfo : MultiFileReaderInterface { - static unique_ptr InitializeInterface(ClientContext &context, MultiFileReader &reader, - MultiFileList &file_list); + static unique_ptr CreateInterface(ClientContext &context); unique_ptr InitializeOptions(ClientContext &context, optional_ptr info) override; @@ -56,6 +55,7 @@ struct ParquetMultiFileInfo : MultiFileReaderInterface { unique_ptr GetCardinality(const MultiFileBindData &bind_data, idx_t file_count) override; void GetVirtualColumns(ClientContext &context, MultiFileBindData &bind_data, virtual_column_map_t &result) override; unique_ptr Copy() override; + FileGlobInput GetGlobInput() override; }; class ParquetScanFunction { diff --git a/src/duckdb/extension/parquet/parquet_multi_file_info.cpp b/src/duckdb/extension/parquet/parquet_multi_file_info.cpp index 227222b8a..9617f0c83 100644 --- a/src/duckdb/extension/parquet/parquet_multi_file_info.cpp +++ b/src/duckdb/extension/parquet/parquet_multi_file_info.cpp @@ -131,8 +131,7 @@ static void BindSchema(ClientContext &context, vector &return_types D_ASSERT(names.size() == return_types.size()); } -unique_ptr -ParquetMultiFileInfo::InitializeInterface(ClientContext &context, MultiFileReader &reader, MultiFileList &file_list) { +unique_ptr ParquetMultiFileInfo::CreateInterface(ClientContext &context) { return make_uniq(); } @@ -244,10 +243,10 @@ static unique_ptr ParquetScanDeserialize(Deserializer &deserialize for (auto &path : files) { file_path.emplace_back(path); } + FileGlobInput input(FileGlobOptions::FALLBACK_GLOB, "parquet"); auto multi_file_reader = MultiFileReader::Create(function); - auto file_list = multi_file_reader->CreateFileList(context, Value::LIST(LogicalType::VARCHAR, file_path), - FileGlobOptions::DISALLOW_EMPTY); + auto file_list = multi_file_reader->CreateFileList(context, Value::LIST(LogicalType::VARCHAR, file_path), input); auto parquet_options = make_uniq(std::move(serialization.parquet_options)); auto interface = make_uniq(); auto bind_data = MultiFileFunction::MultiFileBindInternal( @@ -588,4 +587,8 @@ unique_ptr ParquetMultiFileInfo::Copy() { return make_uniq(); } +FileGlobInput ParquetMultiFileInfo::GetGlobInput() { + return FileGlobInput(FileGlobOptions::FALLBACK_GLOB, "parquet"); +} + } // namespace duckdb diff --git a/src/duckdb/src/common/enum_util.cpp b/src/duckdb/src/common/enum_util.cpp index e61ef38d8..b0526cabb 100644 --- a/src/duckdb/src/common/enum_util.cpp +++ b/src/duckdb/src/common/enum_util.cpp @@ -1849,19 +1849,20 @@ FileExpandResult EnumUtil::FromString(const char *value) { const StringUtil::EnumStringLiteral *GetFileGlobOptionsValues() { static constexpr StringUtil::EnumStringLiteral values[] { { static_cast(FileGlobOptions::DISALLOW_EMPTY), "DISALLOW_EMPTY" }, - { static_cast(FileGlobOptions::ALLOW_EMPTY), "ALLOW_EMPTY" } + { static_cast(FileGlobOptions::ALLOW_EMPTY), "ALLOW_EMPTY" }, + { static_cast(FileGlobOptions::FALLBACK_GLOB), "FALLBACK_GLOB" } }; return values; } template<> const char* EnumUtil::ToChars(FileGlobOptions value) { - return StringUtil::EnumToString(GetFileGlobOptionsValues(), 2, "FileGlobOptions", static_cast(value)); + return StringUtil::EnumToString(GetFileGlobOptionsValues(), 3, "FileGlobOptions", static_cast(value)); } template<> FileGlobOptions EnumUtil::FromString(const char *value) { - return static_cast(StringUtil::StringToEnum(GetFileGlobOptionsValues(), 2, "FileGlobOptions", value)); + return static_cast(StringUtil::StringToEnum(GetFileGlobOptionsValues(), 3, "FileGlobOptions", value)); } const StringUtil::EnumStringLiteral *GetFileLockTypeValues() { diff --git a/src/duckdb/src/common/file_system.cpp b/src/duckdb/src/common/file_system.cpp index 16085d206..42687bac8 100644 --- a/src/duckdb/src/common/file_system.cpp +++ b/src/duckdb/src/common/file_system.cpp @@ -333,6 +333,17 @@ string FileSystem::ExtractBaseName(const string &path) { return vec[0]; } +string FileSystem::ExtractExtension(const string &path) { + if (path.empty()) { + return string(); + } + auto vec = StringUtil::Split(ExtractName(path), "."); + if (vec.size() < 2) { + return string(); + } + return vec.back(); +} + string FileSystem::GetHomeDirectory(optional_ptr opener) { // read the home_directory setting first, if it is set if (opener) { @@ -626,7 +637,7 @@ static string LookupExtensionForPattern(const string &pattern) { return ""; } -vector FileSystem::GlobFiles(const string &pattern, ClientContext &context, FileGlobOptions options) { +vector FileSystem::GlobFiles(const string &pattern, ClientContext &context, const FileGlobInput &input) { auto result = Glob(pattern); if (result.empty()) { string required_extension = LookupExtensionForPattern(pattern); @@ -648,9 +659,19 @@ vector FileSystem::GlobFiles(const string &pattern, ClientContext throw InternalException("Extension load \"%s\" did not throw but somehow the extension was not loaded", required_extension); } - return GlobFiles(pattern, context, options); + return GlobFiles(pattern, context, input); + } + if (input.behavior == FileGlobOptions::FALLBACK_GLOB && !HasGlob(pattern)) { + // if we have no glob in the pattern and we have an extension, we try to glob + if (!HasGlob(pattern)) { + if (input.extension.empty()) { + throw InternalException("FALLBACK_GLOB requires an extension to be specified"); + } + string new_pattern = JoinPath(JoinPath(pattern, "**"), "*." + input.extension); + return GlobFiles(new_pattern, context, FileGlobOptions::DISALLOW_EMPTY); + } } - if (options == FileGlobOptions::DISALLOW_EMPTY) { + if (input.behavior == FileGlobOptions::FALLBACK_GLOB || input.behavior == FileGlobOptions::DISALLOW_EMPTY) { throw IOException("No files found that match the pattern \"%s\"", pattern); } } diff --git a/src/duckdb/src/common/multi_file/multi_file_function.cpp b/src/duckdb/src/common/multi_file/multi_file_function.cpp index e0a8cabcf..33f1fea55 100644 --- a/src/duckdb/src/common/multi_file/multi_file_function.cpp +++ b/src/duckdb/src/common/multi_file/multi_file_function.cpp @@ -6,6 +6,10 @@ namespace duckdb { MultiFileReaderInterface::~MultiFileReaderInterface() { } +void MultiFileReaderInterface::InitializeInterface(ClientContext &context, MultiFileReader &reader, + MultiFileList &file_list) { +} + void MultiFileReaderInterface::FinalizeCopyBind(ClientContext &context, BaseFileReaderOptions &options, const vector &expected_names, const vector &expected_types) { @@ -41,4 +45,8 @@ unique_ptr MultiFileReaderInterface::Copy() { throw InternalException("MultiFileReaderInterface::Copy is not implemented for this file interface"); } +FileGlobInput MultiFileReaderInterface::GetGlobInput() { + return FileGlobOptions::DISALLOW_EMPTY; +} + } // namespace duckdb diff --git a/src/duckdb/src/common/multi_file/multi_file_list.cpp b/src/duckdb/src/common/multi_file/multi_file_list.cpp index 52567f544..97e6addac 100644 --- a/src/duckdb/src/common/multi_file/multi_file_list.cpp +++ b/src/duckdb/src/common/multi_file/multi_file_list.cpp @@ -136,8 +136,12 @@ const OpenFileInfo &MultiFileListIterationHelper::MultiFileListIterator::operato //===--------------------------------------------------------------------===// // MultiFileList //===--------------------------------------------------------------------===// +MultiFileList::MultiFileList(vector paths, FileGlobInput glob_input_p) + : paths(std::move(paths)), glob_input(std::move(glob_input_p)) { +} + MultiFileList::MultiFileList(vector paths, FileGlobOptions options) - : paths(std::move(paths)), glob_options(options) { + : MultiFileList(std::move(paths), FileGlobInput(options)) { } MultiFileList::~MultiFileList() { @@ -270,8 +274,8 @@ idx_t SimpleMultiFileList::GetTotalFileCount() { //===--------------------------------------------------------------------===// // GlobMultiFileList //===--------------------------------------------------------------------===// -GlobMultiFileList::GlobMultiFileList(ClientContext &context_p, vector paths_p, FileGlobOptions options) - : MultiFileList(std::move(paths_p), options), context(context_p), current_path(0) { +GlobMultiFileList::GlobMultiFileList(ClientContext &context_p, vector paths_p, FileGlobInput glob_input) + : MultiFileList(std::move(paths_p), std::move(glob_input)), context(context_p), current_path(0) { } unique_ptr GlobMultiFileList::ComplexFilterPushdown(ClientContext &context_p, @@ -369,7 +373,7 @@ bool GlobMultiFileList::ExpandPathInternal(idx_t ¤t_path, vector MultiFileReader::ParsePaths(const Value &input) { } shared_ptr MultiFileReader::CreateFileList(ClientContext &context, const vector &paths, - FileGlobOptions options) { + const FileGlobInput &glob_input) { vector open_files; for (auto &path : paths) { open_files.emplace_back(path); } - auto res = make_uniq(context, std::move(open_files), options); - if (res->GetExpandResult() == FileExpandResult::NO_FILES && options == FileGlobOptions::DISALLOW_EMPTY) { + auto res = make_uniq(context, std::move(open_files), glob_input); + if (res->GetExpandResult() == FileExpandResult::NO_FILES && glob_input.behavior != FileGlobOptions::ALLOW_EMPTY) { throw IOException("%s needs at least one file to read", function_name); } return std::move(res); } shared_ptr MultiFileReader::CreateFileList(ClientContext &context, const Value &input, - FileGlobOptions options) { + const FileGlobInput &glob_input) { auto paths = ParsePaths(input); - return CreateFileList(context, paths, options); + return CreateFileList(context, paths, glob_input); } bool MultiFileReader::ParseOption(const string &key, const Value &val, MultiFileOptions &options, @@ -645,6 +645,10 @@ void MultiFileReader::PruneReaders(MultiFileBindData &data, MultiFileList &file_ } } +FileGlobInput MultiFileReader::GetGlobInput(MultiFileReaderInterface &interface) { + return interface.GetGlobInput(); +} + HivePartitioningIndex::HivePartitioningIndex(string value_p, idx_t index) : value(std::move(value_p)), index(index) { } diff --git a/src/duckdb/src/common/radix_partitioning.cpp b/src/duckdb/src/common/radix_partitioning.cpp index 4b19a8819..487e106af 100644 --- a/src/duckdb/src/common/radix_partitioning.cpp +++ b/src/duckdb/src/common/radix_partitioning.cpp @@ -51,9 +51,9 @@ RETURN_TYPE RadixBitsSwitch(const idx_t radix_bits, ARGS &&... args) { case 10: return OP::template Operation<10>(std::forward(args)...); case 11: - return OP::template Operation<10>(std::forward(args)...); + return OP::template Operation<11>(std::forward(args)...); case 12: - return OP::template Operation<10>(std::forward(args)...); + return OP::template Operation<12>(std::forward(args)...); default: throw InternalException( "radix_bits higher than RadixPartitioning::MAX_RADIX_BITS encountered in RadixBitsSwitch"); diff --git a/src/duckdb/src/common/types/timestamp.cpp b/src/duckdb/src/common/types/timestamp.cpp index 1e1e80ce7..7a069c004 100644 --- a/src/duckdb/src/common/types/timestamp.cpp +++ b/src/duckdb/src/common/types/timestamp.cpp @@ -429,8 +429,8 @@ void Timestamp::Convert(timestamp_ns_t input, date_t &out_date, dtime_t &out_tim timestamp_t Timestamp::GetCurrentTimestamp() { auto now = system_clock::now(); - auto epoch_ms = duration_cast(now.time_since_epoch()).count(); - return Timestamp::FromEpochMs(epoch_ms); + auto epoch_micros = duration_cast(now.time_since_epoch()).count(); + return FromEpochMicroSeconds(epoch_micros); } timestamp_t Timestamp::FromEpochSecondsPossiblyInfinite(int64_t sec) { diff --git a/src/duckdb/src/common/types/vector.cpp b/src/duckdb/src/common/types/vector.cpp index cad5da707..afb1e1341 100644 --- a/src/duckdb/src/common/types/vector.cpp +++ b/src/duckdb/src/common/types/vector.cpp @@ -135,7 +135,8 @@ void Vector::Reference(const Value &value) { void Vector::Reference(const Vector &other) { if (other.GetType().id() != GetType().id()) { - throw InternalException("Vector::Reference used on vector of different type"); + throw InternalException("Vector::Reference used on vector of different type (source %s referenced %s)", + GetType(), other.GetType()); } D_ASSERT(other.GetType() == GetType()); Reinterpret(other); diff --git a/src/duckdb/src/execution/operator/csv_scanner/table_function/csv_multi_file_info.cpp b/src/duckdb/src/execution/operator/csv_scanner/table_function/csv_multi_file_info.cpp index d5d413d9d..260ac35d4 100644 --- a/src/duckdb/src/execution/operator/csv_scanner/table_function/csv_multi_file_info.cpp +++ b/src/duckdb/src/execution/operator/csv_scanner/table_function/csv_multi_file_info.cpp @@ -8,8 +8,7 @@ namespace duckdb { -unique_ptr -CSVMultiFileInfo::InitializeInterface(ClientContext &context, MultiFileReader &reader, MultiFileList &file_list) { +unique_ptr CSVMultiFileInfo::CreateInterface(ClientContext &context) { return make_uniq(); } @@ -417,4 +416,8 @@ double CSVFileScan::GetProgressInFile(ClientContext &context) { return file_progress * 100.0; } +FileGlobInput CSVMultiFileInfo::GetGlobInput() { + return FileGlobInput(FileGlobOptions::FALLBACK_GLOB, "csv"); +} + } // namespace duckdb diff --git a/src/duckdb/src/execution/physical_plan/plan_merge_into.cpp b/src/duckdb/src/execution/physical_plan/plan_merge_into.cpp index c4d06b0a6..8eb077922 100644 --- a/src/duckdb/src/execution/physical_plan/plan_merge_into.cpp +++ b/src/duckdb/src/execution/physical_plan/plan_merge_into.cpp @@ -119,7 +119,7 @@ PhysicalOperator &DuckCatalog::PlanMergeInto(ClientContext &context, PhysicalPla PhysicalOperator &Catalog::PlanMergeInto(ClientContext &context, PhysicalPlanGenerator &planner, LogicalMergeInto &op, PhysicalOperator &plan) { - throw NotImplementedException("Database type \"%s\" does not support MERGE INTO or ON CONFLICT", GetName()); + throw NotImplementedException("Database type \"%s\" does not support MERGE INTO or ON CONFLICT", GetCatalogType()); } PhysicalOperator &PhysicalPlanGenerator::CreatePlan(LogicalMergeInto &op) { diff --git a/src/duckdb/src/function/table/copy_csv.cpp b/src/duckdb/src/function/table/copy_csv.cpp index 806257c1f..381422c53 100644 --- a/src/duckdb/src/function/table/copy_csv.cpp +++ b/src/duckdb/src/function/table/copy_csv.cpp @@ -379,7 +379,7 @@ bool WriteCSVRotateFiles(FunctionData &, const optional_idx &file_size_bytes) { bool WriteCSVRotateNextFile(GlobalFunctionData &gstate, FunctionData &, const optional_idx &file_size_bytes) { auto &global_state = gstate.Cast(); - return global_state.FileSize() > file_size_bytes.GetIndex(); + return file_size_bytes.IsValid() && global_state.FileSize() > file_size_bytes.GetIndex(); } void CSVCopyFunction::RegisterFunction(BuiltinFunctions &set) { diff --git a/src/duckdb/src/function/table/direct_file_reader.cpp b/src/duckdb/src/function/table/direct_file_reader.cpp index 131beaca3..8aa6aba35 100644 --- a/src/duckdb/src/function/table/direct_file_reader.cpp +++ b/src/duckdb/src/function/table/direct_file_reader.cpp @@ -5,7 +5,7 @@ namespace duckdb { DirectFileReader::DirectFileReader(OpenFileInfo file_p, const LogicalType &type) - : BaseFileReader(std::move(file_p)), type(type), done(false) { + : BaseFileReader(std::move(file_p)), done(false), type(type) { columns.push_back(MultiFileColumnDefinition("filename", LogicalType::VARCHAR)); columns.push_back(MultiFileColumnDefinition("content", type)); columns.push_back(MultiFileColumnDefinition("size", LogicalType::BIGINT)); diff --git a/src/duckdb/src/function/table/read_file.cpp b/src/duckdb/src/function/table/read_file.cpp index d7e375047..d0481cc23 100644 --- a/src/duckdb/src/function/table/read_file.cpp +++ b/src/duckdb/src/function/table/read_file.cpp @@ -15,9 +15,7 @@ namespace duckdb { //------------------------------------------------------------------------------ template -unique_ptr DirectMultiFileInfo::InitializeInterface(ClientContext &context, - MultiFileReader &reader, - MultiFileList &file_list) { +unique_ptr DirectMultiFileInfo::CreateInterface(ClientContext &context) { return make_uniq(); }; @@ -126,14 +124,19 @@ unique_ptr DirectMultiFileInfo::GetCardinality(const MultiFi result->has_estimated_cardinality = true; result->estimated_cardinality = bind_data.file_list->GetTotalFileCount(); return result; -}; +} + +template +FileGlobInput DirectMultiFileInfo::GetGlobInput() { + return FileGlobOptions::ALLOW_EMPTY; +} //------------------------------------------------------------------------------ // Register //------------------------------------------------------------------------------ template static TableFunction GetFunction() { - MultiFileFunction, DirectFileGlobOptions> table_function(OP::NAME); + MultiFileFunction> table_function(OP::NAME); return table_function; } diff --git a/src/duckdb/src/function/table/system/duckdb_log.cpp b/src/duckdb/src/function/table/system/duckdb_log.cpp index 2be0531c9..f84cb405a 100644 --- a/src/duckdb/src/function/table/system/duckdb_log.cpp +++ b/src/duckdb/src/function/table/system/duckdb_log.cpp @@ -28,7 +28,7 @@ static unique_ptr DuckDBLogBind(ClientContext &context, TableFunct return_types.emplace_back(LogicalType::UBIGINT); names.emplace_back("timestamp"); - return_types.emplace_back(LogicalType::TIMESTAMP); + return_types.emplace_back(LogicalType::TIMESTAMP_TZ); names.emplace_back("type"); return_types.emplace_back(LogicalType::VARCHAR); diff --git a/src/duckdb/src/function/table/table_scan.cpp b/src/duckdb/src/function/table/table_scan.cpp index 5fea970c0..9e2755e7e 100644 --- a/src/duckdb/src/function/table/table_scan.cpp +++ b/src/duckdb/src/function/table/table_scan.cpp @@ -2,15 +2,19 @@ #include "duckdb/catalog/catalog_entry/duck_table_entry.hpp" #include "duckdb/catalog/dependency_list.hpp" +#include "duckdb/common/enums/expression_type.hpp" #include "duckdb/common/mutex.hpp" #include "duckdb/common/serializer/deserializer.hpp" #include "duckdb/common/serializer/serializer.hpp" +#include "duckdb/common/typedefs.hpp" +#include "duckdb/common/unique_ptr.hpp" #include "duckdb/execution/index/art/art.hpp" #include "duckdb/function/function_set.hpp" #include "duckdb/main/attached_database.hpp" #include "duckdb/main/client_config.hpp" -#include "duckdb/optimizer/matcher/expression_matcher.hpp" -#include "duckdb/planner/expression/bound_between_expression.hpp" +#include "duckdb/planner/expression.hpp" +#include "duckdb/planner/expression/bound_columnref_expression.hpp" +#include "duckdb/planner/expression/bound_function_expression.hpp" #include "duckdb/planner/operator/logical_get.hpp" #include "duckdb/storage/data_table.hpp" #include "duckdb/storage/table/scan_state.hpp" @@ -493,6 +497,35 @@ bool TryScanIndex(ART &art, const ColumnList &column_list, TableFunctionInitInpu return false; } + // Resolve bound column references in the index_expr against the current input projection + column_t updated_index_column; + bool found_index_column_in_input = false; + + // Find the indexed column amongst the input columns + for (idx_t i = 0; i < input.column_ids.size(); ++i) { + if (input.column_ids[i] == indexed_columns[0]) { + updated_index_column = i; + found_index_column_in_input = true; + break; + } + } + + // If found, update the bound column ref within index_expr + if (found_index_column_in_input) { + ExpressionIterator::EnumerateExpression(index_expr, [&](Expression &expr) { + if (expr.GetExpressionClass() != ExpressionClass::BOUND_COLUMN_REF) { + return; + } + + auto &bound_column_ref_expr = expr.Cast(); + + // If the bound column references the index column, use updated_index_column + if (bound_column_ref_expr.binding.column_index == indexed_columns[0]) { + bound_column_ref_expr.binding.column_index = updated_index_column; + } + }); + } + // Get ART column. auto &col = column_list.GetColumn(LogicalIndex(indexed_columns[0])); diff --git a/src/duckdb/src/function/table/version/pragma_version.cpp b/src/duckdb/src/function/table/version/pragma_version.cpp index 9d0254963..26567cbf1 100644 --- a/src/duckdb/src/function/table/version/pragma_version.cpp +++ b/src/duckdb/src/function/table/version/pragma_version.cpp @@ -1,5 +1,5 @@ #ifndef DUCKDB_PATCH_VERSION -#define DUCKDB_PATCH_VERSION "0-dev3437" +#define DUCKDB_PATCH_VERSION "0-dev3480" #endif #ifndef DUCKDB_MINOR_VERSION #define DUCKDB_MINOR_VERSION 4 @@ -8,10 +8,10 @@ #define DUCKDB_MAJOR_VERSION 1 #endif #ifndef DUCKDB_VERSION -#define DUCKDB_VERSION "v1.4.0-dev3437" +#define DUCKDB_VERSION "v1.4.0-dev3480" #endif #ifndef DUCKDB_SOURCE_ID -#define DUCKDB_SOURCE_ID "3073352fee" +#define DUCKDB_SOURCE_ID "ecde6b6514" #endif #include "duckdb/function/table/system_functions.hpp" #include "duckdb/main/database.hpp" diff --git a/src/duckdb/src/include/duckdb/common/chrono.hpp b/src/duckdb/src/include/duckdb/common/chrono.hpp index 5b3733d37..8083e5e09 100644 --- a/src/duckdb/src/include/duckdb/common/chrono.hpp +++ b/src/duckdb/src/include/duckdb/common/chrono.hpp @@ -14,6 +14,7 @@ namespace duckdb { using std::chrono::duration; using std::chrono::duration_cast; using std::chrono::high_resolution_clock; +using std::chrono::microseconds; using std::chrono::milliseconds; using std::chrono::nanoseconds; using std::chrono::steady_clock; diff --git a/src/duckdb/src/include/duckdb/common/enums/file_glob_options.hpp b/src/duckdb/src/include/duckdb/common/enums/file_glob_options.hpp index 94f528d59..bd6e58f22 100644 --- a/src/duckdb/src/include/duckdb/common/enums/file_glob_options.hpp +++ b/src/duckdb/src/include/duckdb/common/enums/file_glob_options.hpp @@ -12,9 +12,17 @@ namespace duckdb { -enum class FileGlobOptions : uint8_t { - DISALLOW_EMPTY = 0, - ALLOW_EMPTY = 1, +enum class FileGlobOptions : uint8_t { DISALLOW_EMPTY = 0, ALLOW_EMPTY = 1, FALLBACK_GLOB = 2 }; + +struct FileGlobInput { + FileGlobInput(FileGlobOptions options) // NOLINT: allow implicit conversion from FileGlobOptions + : behavior(options) { + } + FileGlobInput(FileGlobOptions options, string extension_p) : behavior(options), extension(std::move(extension_p)) { + } + + FileGlobOptions behavior; + string extension; }; } // namespace duckdb diff --git a/src/duckdb/src/include/duckdb/common/file_system.hpp b/src/duckdb/src/include/duckdb/common/file_system.hpp index b8e05bc4b..4e95f5ce5 100644 --- a/src/duckdb/src/include/duckdb/common/file_system.hpp +++ b/src/duckdb/src/include/duckdb/common/file_system.hpp @@ -221,6 +221,8 @@ class FileSystem { DUCKDB_API string ConvertSeparators(const string &path); //! Extract the base name of a file (e.g. if the input is lib/example.dll the base name is 'example') DUCKDB_API string ExtractBaseName(const string &path); + //! Extract the extension of a file (e.g. if the input is lib/example.dll the extension is 'dll') + DUCKDB_API string ExtractExtension(const string &path); //! Extract the name of a file (e.g if the input is lib/example.dll the name is 'example.dll') DUCKDB_API string ExtractName(const string &path); @@ -232,7 +234,7 @@ class FileSystem { //! Runs a glob on the file system, returning a list of matching files DUCKDB_API virtual vector Glob(const string &path, FileOpener *opener = nullptr); DUCKDB_API vector GlobFiles(const string &path, ClientContext &context, - FileGlobOptions options = FileGlobOptions::DISALLOW_EMPTY); + const FileGlobInput &input = FileGlobOptions::DISALLOW_EMPTY); //! registers a sub-file system to handle certain file name prefixes, e.g. http:// etc. DUCKDB_API virtual void RegisterSubSystem(unique_ptr sub_fs); diff --git a/src/duckdb/src/include/duckdb/common/multi_file/multi_file_function.hpp b/src/duckdb/src/include/duckdb/common/multi_file/multi_file_function.hpp index e34418e0c..ab17f7b87 100644 --- a/src/duckdb/src/include/duckdb/common/multi_file/multi_file_function.hpp +++ b/src/duckdb/src/include/duckdb/common/multi_file/multi_file_function.hpp @@ -20,6 +20,7 @@ namespace duckdb { struct MultiFileReaderInterface { virtual ~MultiFileReaderInterface(); + virtual void InitializeInterface(ClientContext &context, MultiFileReader &reader, MultiFileList &file_list); virtual unique_ptr InitializeOptions(ClientContext &context, optional_ptr info) = 0; virtual bool ParseCopyOption(ClientContext &context, const string &key, const vector &values, @@ -55,15 +56,10 @@ struct MultiFileReaderInterface { virtual unique_ptr GetCardinality(const MultiFileBindData &bind_data, idx_t file_count) = 0; virtual void GetVirtualColumns(ClientContext &context, MultiFileBindData &bind_data, virtual_column_map_t &result); virtual unique_ptr Copy(); + virtual FileGlobInput GetGlobInput(); }; -struct DefaultFileGlobOptions { - static FileGlobOptions GetFileGlobOptions() { - return FileGlobOptions::DISALLOW_EMPTY; - } -}; - -template +template class MultiFileFunction : public TableFunction { public: explicit MultiFileFunction(string name_p) @@ -165,10 +161,13 @@ class MultiFileFunction : public TableFunction { static unique_ptr MultiFileBind(ClientContext &context, TableFunctionBindInput &input, vector &return_types, vector &names) { + auto interface = OP::CreateInterface(context); auto multi_file_reader = MultiFileReader::Create(input.table_function); - auto file_list = multi_file_reader->CreateFileList(context, input.inputs[0], OPTIONS::GetFileGlobOptions()); - auto interface = OP::InitializeInterface(context, *multi_file_reader, *file_list); + auto glob_input = multi_file_reader->GetGlobInput(*interface); + auto file_list = multi_file_reader->CreateFileList(context, input.inputs[0], glob_input); + + interface->InitializeInterface(context, *multi_file_reader, *file_list); MultiFileOptions file_options; @@ -190,11 +189,13 @@ class MultiFileFunction : public TableFunction { static unique_ptr MultiFileBindCopy(ClientContext &context, CopyFromFunctionBindInput &input, vector &expected_names, vector &expected_types) { + auto interface = OP::CreateInterface(context); auto multi_file_reader = MultiFileReader::CreateDefault("COPY"); vector paths = {input.info.file_path}; - auto file_list = multi_file_reader->CreateFileList(context, paths); + auto glob_input = multi_file_reader->GetGlobInput(*interface); + auto file_list = multi_file_reader->CreateFileList(context, paths, glob_input); - auto interface = OP::InitializeInterface(context, *multi_file_reader, *file_list); + interface->InitializeInterface(context, *multi_file_reader, *file_list); auto options = interface->InitializeOptions(context, nullptr); MultiFileOptions file_options; diff --git a/src/duckdb/src/include/duckdb/common/multi_file/multi_file_list.hpp b/src/duckdb/src/include/duckdb/common/multi_file/multi_file_list.hpp index 0359c0a99..ded5bad05 100644 --- a/src/duckdb/src/include/duckdb/common/multi_file/multi_file_list.hpp +++ b/src/duckdb/src/include/duckdb/common/multi_file/multi_file_list.hpp @@ -69,7 +69,8 @@ struct MultiFilePushdownInfo { //! NOTE: subclasses are responsible for ensuring thread-safety class MultiFileList { public: - explicit MultiFileList(vector paths, FileGlobOptions options); + MultiFileList(vector paths, FileGlobOptions options); + MultiFileList(vector paths, FileGlobInput input); virtual ~MultiFileList(); //! Returns the raw, unexpanded paths, pre-filter @@ -114,7 +115,7 @@ class MultiFileList { //! The unexpanded input paths const vector paths; //! Whether paths can expand to 0 files - const FileGlobOptions glob_options; + const FileGlobInput glob_input; public: template @@ -157,7 +158,7 @@ class SimpleMultiFileList : public MultiFileList { //! MultiFileList that takes a list of paths and produces a list of files with all globs expanded class GlobMultiFileList : public MultiFileList { public: - GlobMultiFileList(ClientContext &context, vector paths, FileGlobOptions options); + GlobMultiFileList(ClientContext &context, vector paths, FileGlobInput glob_input); //! Calls ExpandAll, then prunes the expanded_files using the hive/filename filters unique_ptr ComplexFilterPushdown(ClientContext &context, const MultiFileOptions &options, MultiFilePushdownInfo &info, diff --git a/src/duckdb/src/include/duckdb/common/multi_file/multi_file_reader.hpp b/src/duckdb/src/include/duckdb/common/multi_file/multi_file_reader.hpp index 6cbb77fb6..5f45e4297 100644 --- a/src/duckdb/src/include/duckdb/common/multi_file/multi_file_reader.hpp +++ b/src/duckdb/src/include/duckdb/common/multi_file/multi_file_reader.hpp @@ -67,10 +67,11 @@ struct MultiFileReader { //! Create a MultiFileList from a vector of paths. Any globs will be expanded using the default filesystem DUCKDB_API virtual shared_ptr CreateFileList(ClientContext &context, const vector &paths, - FileGlobOptions options = FileGlobOptions::DISALLOW_EMPTY); + const FileGlobInput &glob_input = FileGlobOptions::DISALLOW_EMPTY); //! Shorthand for ParsePaths + CreateFileList - DUCKDB_API shared_ptr CreateFileList(ClientContext &context, const Value &input, - FileGlobOptions options = FileGlobOptions::DISALLOW_EMPTY); + DUCKDB_API shared_ptr + CreateFileList(ClientContext &context, const Value &input, + const FileGlobInput &glob_input = FileGlobOptions::DISALLOW_EMPTY); //! Parse the named parameters of a multi-file reader DUCKDB_API virtual bool ParseOption(const string &key, const Value &val, MultiFileOptions &options, @@ -186,6 +187,8 @@ struct MultiFileReader { DUCKDB_API virtual unique_ptr Copy() const; + DUCKDB_API virtual FileGlobInput GetGlobInput(MultiFileReaderInterface &interface); + protected: //! Used in errors to report which function is using this MultiFileReader string function_name; diff --git a/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_multi_file_info.hpp b/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_multi_file_info.hpp index 8dea8b265..50d046fb0 100644 --- a/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_multi_file_info.hpp +++ b/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_multi_file_info.hpp @@ -31,9 +31,8 @@ struct CSVSchemaDiscovery { MultiFileList &multi_file_list); }; -struct CSVMultiFileInfo : public MultiFileReaderInterface { - static unique_ptr InitializeInterface(ClientContext &context, MultiFileReader &reader, - MultiFileList &file_list); +struct CSVMultiFileInfo : MultiFileReaderInterface { + static unique_ptr CreateInterface(ClientContext &context); unique_ptr InitializeOptions(ClientContext &context, optional_ptr info) override; @@ -65,6 +64,7 @@ struct CSVMultiFileInfo : public MultiFileReaderInterface { void FinishReading(ClientContext &context, GlobalTableFunctionState &global_state, LocalTableFunctionState &local_state) override; unique_ptr GetCardinality(const MultiFileBindData &bind_data, idx_t file_count) override; + FileGlobInput GetGlobInput() override; }; } // namespace duckdb diff --git a/src/duckdb/src/include/duckdb/function/table/read_file.hpp b/src/duckdb/src/include/duckdb/function/table/read_file.hpp index 4f12d9922..966fea5ef 100644 --- a/src/duckdb/src/include/duckdb/function/table/read_file.hpp +++ b/src/duckdb/src/include/duckdb/function/table/read_file.hpp @@ -51,8 +51,7 @@ struct ReadTextOperation { template struct DirectMultiFileInfo : MultiFileReaderInterface { - static unique_ptr InitializeInterface(ClientContext &context, MultiFileReader &reader, - MultiFileList &file_list); + static unique_ptr CreateInterface(ClientContext &context); unique_ptr InitializeOptions(ClientContext &context, optional_ptr info) override; bool ParseCopyOption(ClientContext &context, const string &key, const vector &values, @@ -78,12 +77,7 @@ struct DirectMultiFileInfo : MultiFileReaderInterface { BaseFileReaderOptions &options, const MultiFileOptions &file_options) override; unique_ptr GetCardinality(const MultiFileBindData &bind_data, idx_t file_count) override; -}; - -struct DirectFileGlobOptions { - static FileGlobOptions GetFileGlobOptions() { - return FileGlobOptions::ALLOW_EMPTY; - } + FileGlobInput GetGlobInput() override; }; } // namespace duckdb diff --git a/src/duckdb/src/logging/log_storage.cpp b/src/duckdb/src/logging/log_storage.cpp index 30196d6c2..c6733d968 100644 --- a/src/duckdb/src/logging/log_storage.cpp +++ b/src/duckdb/src/logging/log_storage.cpp @@ -518,7 +518,8 @@ unique_ptr FileLogStorage::BindReplace(ClientContext &context, TableFu string columns; if (table == LoggingTargetTable::LOG_ENTRIES) { - columns = "'context_id': 'UBIGINT', 'timestamp': 'TIMESTAMP', 'type': 'VARCHAR', 'log_level': 'VARCHAR' , " + columns = "'context_id': 'UBIGINT', 'timestamp': 'TIMESTAMP WITH TIME ZONE', 'type': 'VARCHAR', 'log_level': " + "'VARCHAR' , " "'message': 'VARCHAR'"; } else if (table == LoggingTargetTable::LOG_CONTEXTS) { columns = "'context_id': 'UBIGINT', 'scope': 'VARCHAR', 'connection_id': 'UBIGINT', 'transaction_id': " @@ -527,7 +528,8 @@ unique_ptr FileLogStorage::BindReplace(ClientContext &context, TableFu select = "SELECT context_id, scope, connection_id, transaction_id, query_id, thread_id, timestamp, type, " "log_level, message "; columns = "'context_id': 'UBIGINT', 'scope': 'VARCHAR', 'connection_id': 'UBIGINT', 'transaction_id': " - "'UBIGINT', 'query_id': 'UBIGINT', 'thread_id': 'UBIGINT', 'timestamp': 'TIMESTAMP', 'type': " + "'UBIGINT', 'query_id': 'UBIGINT', 'thread_id': 'UBIGINT', 'timestamp': 'TIMESTAMP WITH TIME ZONE', " + "'type': " "'VARCHAR', 'log_level': 'VARCHAR' , 'message': 'VARCHAR'"; } else { throw InternalException("Invalid logging target table"); diff --git a/src/duckdb/src/planner/binder/expression/bind_operator_expression.cpp b/src/duckdb/src/planner/binder/expression/bind_operator_expression.cpp index d5baa1c8a..a00399aa3 100644 --- a/src/duckdb/src/planner/binder/expression/bind_operator_expression.cpp +++ b/src/duckdb/src/planner/binder/expression/bind_operator_expression.cpp @@ -127,7 +127,6 @@ BindResult ExpressionBinder::BindExpression(OperatorExpression &op, idx_t depth) if (const_exp.value.TryCastAs(context, LogicalType::UINTEGER)) { // Array extraction: if the cast fails it's definitely out-of-bounds for a JSON array auto index = UIntegerValue::Get(const_exp.value); - index -= index > 0; // Subtract 1 for SQL 1-based indexing (except when accessing from back) const_exp.value = StringUtil::Format("$[%lld]", index); const_exp.return_type = LogicalType::VARCHAR; } else if (const_exp.return_type.id() == LogicalType::VARCHAR) { diff --git a/src/duckdb/src/planner/operator/logical_copy_to_file.cpp b/src/duckdb/src/planner/operator/logical_copy_to_file.cpp index 1227d39a1..beee4f121 100644 --- a/src/duckdb/src/planner/operator/logical_copy_to_file.cpp +++ b/src/duckdb/src/planner/operator/logical_copy_to_file.cpp @@ -69,6 +69,7 @@ void LogicalCopyToFile::Serialize(Serializer &serializer) const { serializer.WritePropertyWithDefault(217, "write_empty_file", write_empty_file, true); serializer.WritePropertyWithDefault(218, "preserve_order", preserve_order, PreserveOrderType::AUTOMATIC); serializer.WritePropertyWithDefault(219, "hive_file_pattern", hive_file_pattern, true); + serializer.WritePropertyWithDefault(220, "file_size_bytes", file_size_bytes, optional_idx()); } unique_ptr LogicalCopyToFile::Deserialize(Deserializer &deserializer) { @@ -117,6 +118,7 @@ unique_ptr LogicalCopyToFile::Deserialize(Deserializer &deseria auto preserve_order = deserializer.ReadPropertyWithExplicitDefault(218, "preserve_order", PreserveOrderType::AUTOMATIC); auto hive_file_pattern = deserializer.ReadPropertyWithExplicitDefault(219, "hive_file_pattern", true); + auto file_size_bytes = deserializer.ReadPropertyWithExplicitDefault(220, "file_size_bytes", optional_idx()); if (!has_serialize) { // If not serialized, re-bind with the copy info @@ -147,6 +149,7 @@ unique_ptr LogicalCopyToFile::Deserialize(Deserializer &deseria result->write_empty_file = write_empty_file; result->preserve_order = preserve_order; result->hive_file_pattern = hive_file_pattern; + result->file_size_bytes = file_size_bytes; return std::move(result); }