Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/duckdb/extension/json/include/json_multi_file_info.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,7 @@ class JSONFileReaderOptions : public BaseFileReaderOptions {
};

struct JSONMultiFileInfo : MultiFileReaderInterface {
static unique_ptr<MultiFileReaderInterface> InitializeInterface(ClientContext &context, MultiFileReader &reader,
MultiFileList &file_list);
static unique_ptr<MultiFileReaderInterface> CreateInterface(ClientContext &context);

unique_ptr<BaseFileReaderOptions> InitializeOptions(ClientContext &context,
optional_ptr<TableFunctionInfo> info) override;
Expand Down Expand Up @@ -49,6 +48,7 @@ struct JSONMultiFileInfo : MultiFileReaderInterface {
void FinishReading(ClientContext &context, GlobalTableFunctionState &global_state,
LocalTableFunctionState &local_state) override;
unique_ptr<NodeStatistics> GetCardinality(const MultiFileBindData &bind_data, idx_t file_count) override;
FileGlobInput GetGlobInput() override;
};

} // namespace duckdb
7 changes: 5 additions & 2 deletions src/duckdb/extension/json/json_multi_file_info.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,7 @@

namespace duckdb {

unique_ptr<MultiFileReaderInterface>
JSONMultiFileInfo::InitializeInterface(ClientContext &context, MultiFileReader &reader, MultiFileList &file_list) {
unique_ptr<MultiFileReaderInterface> JSONMultiFileInfo::CreateInterface(ClientContext &context) {
return make_uniq<JSONMultiFileInfo>();
}

Expand Down Expand Up @@ -579,4 +578,8 @@ optional_idx JSONMultiFileInfo::MaxThreads(const MultiFileBindData &bind_data, c
return json_data.max_threads;
}

FileGlobInput JSONMultiFileInfo::GetGlobInput() {
return FileGlobInput(FileGlobOptions::FALLBACK_GLOB, "json");
}

} // namespace duckdb
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,7 @@ class ParquetFileReaderOptions : public BaseFileReaderOptions {
};

struct ParquetMultiFileInfo : MultiFileReaderInterface {
static unique_ptr<MultiFileReaderInterface> InitializeInterface(ClientContext &context, MultiFileReader &reader,
MultiFileList &file_list);
static unique_ptr<MultiFileReaderInterface> CreateInterface(ClientContext &context);

unique_ptr<BaseFileReaderOptions> InitializeOptions(ClientContext &context,
optional_ptr<TableFunctionInfo> info) override;
Expand Down Expand Up @@ -56,6 +55,7 @@ struct ParquetMultiFileInfo : MultiFileReaderInterface {
unique_ptr<NodeStatistics> GetCardinality(const MultiFileBindData &bind_data, idx_t file_count) override;
void GetVirtualColumns(ClientContext &context, MultiFileBindData &bind_data, virtual_column_map_t &result) override;
unique_ptr<MultiFileReaderInterface> Copy() override;
FileGlobInput GetGlobInput() override;
};

class ParquetScanFunction {
Expand Down
11 changes: 7 additions & 4 deletions src/duckdb/extension/parquet/parquet_multi_file_info.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -131,8 +131,7 @@ static void BindSchema(ClientContext &context, vector<LogicalType> &return_types
D_ASSERT(names.size() == return_types.size());
}

unique_ptr<MultiFileReaderInterface>
ParquetMultiFileInfo::InitializeInterface(ClientContext &context, MultiFileReader &reader, MultiFileList &file_list) {
unique_ptr<MultiFileReaderInterface> ParquetMultiFileInfo::CreateInterface(ClientContext &context) {
return make_uniq<ParquetMultiFileInfo>();
}

Expand Down Expand Up @@ -244,10 +243,10 @@ static unique_ptr<FunctionData> ParquetScanDeserialize(Deserializer &deserialize
for (auto &path : files) {
file_path.emplace_back(path);
}
FileGlobInput input(FileGlobOptions::FALLBACK_GLOB, "parquet");

auto multi_file_reader = MultiFileReader::Create(function);
auto file_list = multi_file_reader->CreateFileList(context, Value::LIST(LogicalType::VARCHAR, file_path),
FileGlobOptions::DISALLOW_EMPTY);
auto file_list = multi_file_reader->CreateFileList(context, Value::LIST(LogicalType::VARCHAR, file_path), input);
auto parquet_options = make_uniq<ParquetFileReaderOptions>(std::move(serialization.parquet_options));
auto interface = make_uniq<ParquetMultiFileInfo>();
auto bind_data = MultiFileFunction<ParquetMultiFileInfo>::MultiFileBindInternal(
Expand Down Expand Up @@ -588,4 +587,8 @@ unique_ptr<MultiFileReaderInterface> ParquetMultiFileInfo::Copy() {
return make_uniq<ParquetMultiFileInfo>();
}

FileGlobInput ParquetMultiFileInfo::GetGlobInput() {
return FileGlobInput(FileGlobOptions::FALLBACK_GLOB, "parquet");
}

} // namespace duckdb
7 changes: 4 additions & 3 deletions src/duckdb/src/common/enum_util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1849,19 +1849,20 @@ FileExpandResult EnumUtil::FromString<FileExpandResult>(const char *value) {
const StringUtil::EnumStringLiteral *GetFileGlobOptionsValues() {
static constexpr StringUtil::EnumStringLiteral values[] {
{ static_cast<uint32_t>(FileGlobOptions::DISALLOW_EMPTY), "DISALLOW_EMPTY" },
{ static_cast<uint32_t>(FileGlobOptions::ALLOW_EMPTY), "ALLOW_EMPTY" }
{ static_cast<uint32_t>(FileGlobOptions::ALLOW_EMPTY), "ALLOW_EMPTY" },
{ static_cast<uint32_t>(FileGlobOptions::FALLBACK_GLOB), "FALLBACK_GLOB" }
};
return values;
}

template<>
const char* EnumUtil::ToChars<FileGlobOptions>(FileGlobOptions value) {
return StringUtil::EnumToString(GetFileGlobOptionsValues(), 2, "FileGlobOptions", static_cast<uint32_t>(value));
return StringUtil::EnumToString(GetFileGlobOptionsValues(), 3, "FileGlobOptions", static_cast<uint32_t>(value));
}

template<>
FileGlobOptions EnumUtil::FromString<FileGlobOptions>(const char *value) {
return static_cast<FileGlobOptions>(StringUtil::StringToEnum(GetFileGlobOptionsValues(), 2, "FileGlobOptions", value));
return static_cast<FileGlobOptions>(StringUtil::StringToEnum(GetFileGlobOptionsValues(), 3, "FileGlobOptions", value));
}

const StringUtil::EnumStringLiteral *GetFileLockTypeValues() {
Expand Down
27 changes: 24 additions & 3 deletions src/duckdb/src/common/file_system.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -333,6 +333,17 @@ string FileSystem::ExtractBaseName(const string &path) {
return vec[0];
}

string FileSystem::ExtractExtension(const string &path) {
if (path.empty()) {
return string();
}
auto vec = StringUtil::Split(ExtractName(path), ".");
if (vec.size() < 2) {
return string();
}
return vec.back();
}

string FileSystem::GetHomeDirectory(optional_ptr<FileOpener> opener) {
// read the home_directory setting first, if it is set
if (opener) {
Expand Down Expand Up @@ -626,7 +637,7 @@ static string LookupExtensionForPattern(const string &pattern) {
return "";
}

vector<OpenFileInfo> FileSystem::GlobFiles(const string &pattern, ClientContext &context, FileGlobOptions options) {
vector<OpenFileInfo> FileSystem::GlobFiles(const string &pattern, ClientContext &context, const FileGlobInput &input) {
auto result = Glob(pattern);
if (result.empty()) {
string required_extension = LookupExtensionForPattern(pattern);
Expand All @@ -648,9 +659,19 @@ vector<OpenFileInfo> FileSystem::GlobFiles(const string &pattern, ClientContext
throw InternalException("Extension load \"%s\" did not throw but somehow the extension was not loaded",
required_extension);
}
return GlobFiles(pattern, context, options);
return GlobFiles(pattern, context, input);
}
if (input.behavior == FileGlobOptions::FALLBACK_GLOB && !HasGlob(pattern)) {
// if we have no glob in the pattern and we have an extension, we try to glob
if (!HasGlob(pattern)) {
if (input.extension.empty()) {
throw InternalException("FALLBACK_GLOB requires an extension to be specified");
}
string new_pattern = JoinPath(JoinPath(pattern, "**"), "*." + input.extension);
return GlobFiles(new_pattern, context, FileGlobOptions::DISALLOW_EMPTY);
}
}
if (options == FileGlobOptions::DISALLOW_EMPTY) {
if (input.behavior == FileGlobOptions::FALLBACK_GLOB || input.behavior == FileGlobOptions::DISALLOW_EMPTY) {
throw IOException("No files found that match the pattern \"%s\"", pattern);
}
}
Expand Down
8 changes: 8 additions & 0 deletions src/duckdb/src/common/multi_file/multi_file_function.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ namespace duckdb {
MultiFileReaderInterface::~MultiFileReaderInterface() {
}

void MultiFileReaderInterface::InitializeInterface(ClientContext &context, MultiFileReader &reader,
MultiFileList &file_list) {
}

void MultiFileReaderInterface::FinalizeCopyBind(ClientContext &context, BaseFileReaderOptions &options,
const vector<string> &expected_names,
const vector<LogicalType> &expected_types) {
Expand Down Expand Up @@ -41,4 +45,8 @@ unique_ptr<MultiFileReaderInterface> MultiFileReaderInterface::Copy() {
throw InternalException("MultiFileReaderInterface::Copy is not implemented for this file interface");
}

FileGlobInput MultiFileReaderInterface::GetGlobInput() {
return FileGlobOptions::DISALLOW_EMPTY;
}

} // namespace duckdb
12 changes: 8 additions & 4 deletions src/duckdb/src/common/multi_file/multi_file_list.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -136,8 +136,12 @@ const OpenFileInfo &MultiFileListIterationHelper::MultiFileListIterator::operato
//===--------------------------------------------------------------------===//
// MultiFileList
//===--------------------------------------------------------------------===//
MultiFileList::MultiFileList(vector<OpenFileInfo> paths, FileGlobInput glob_input_p)
: paths(std::move(paths)), glob_input(std::move(glob_input_p)) {
}

MultiFileList::MultiFileList(vector<OpenFileInfo> paths, FileGlobOptions options)
: paths(std::move(paths)), glob_options(options) {
: MultiFileList(std::move(paths), FileGlobInput(options)) {
}

MultiFileList::~MultiFileList() {
Expand Down Expand Up @@ -270,8 +274,8 @@ idx_t SimpleMultiFileList::GetTotalFileCount() {
//===--------------------------------------------------------------------===//
// GlobMultiFileList
//===--------------------------------------------------------------------===//
GlobMultiFileList::GlobMultiFileList(ClientContext &context_p, vector<OpenFileInfo> paths_p, FileGlobOptions options)
: MultiFileList(std::move(paths_p), options), context(context_p), current_path(0) {
GlobMultiFileList::GlobMultiFileList(ClientContext &context_p, vector<OpenFileInfo> paths_p, FileGlobInput glob_input)
: MultiFileList(std::move(paths_p), std::move(glob_input)), context(context_p), current_path(0) {
}

unique_ptr<MultiFileList> GlobMultiFileList::ComplexFilterPushdown(ClientContext &context_p,
Expand Down Expand Up @@ -369,7 +373,7 @@ bool GlobMultiFileList::ExpandPathInternal(idx_t &current_path, vector<OpenFileI
}

auto &fs = FileSystem::GetFileSystem(context);
auto glob_files = fs.GlobFiles(paths[current_path].path, context, glob_options);
auto glob_files = fs.GlobFiles(paths[current_path].path, context, glob_input);
std::sort(glob_files.begin(), glob_files.end());
result.insert(result.end(), glob_files.begin(), glob_files.end());

Expand Down
14 changes: 9 additions & 5 deletions src/duckdb/src/common/multi_file/multi_file_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -116,22 +116,22 @@ vector<string> MultiFileReader::ParsePaths(const Value &input) {
}

shared_ptr<MultiFileList> MultiFileReader::CreateFileList(ClientContext &context, const vector<string> &paths,
FileGlobOptions options) {
const FileGlobInput &glob_input) {
vector<OpenFileInfo> open_files;
for (auto &path : paths) {
open_files.emplace_back(path);
}
auto res = make_uniq<GlobMultiFileList>(context, std::move(open_files), options);
if (res->GetExpandResult() == FileExpandResult::NO_FILES && options == FileGlobOptions::DISALLOW_EMPTY) {
auto res = make_uniq<GlobMultiFileList>(context, std::move(open_files), glob_input);
if (res->GetExpandResult() == FileExpandResult::NO_FILES && glob_input.behavior != FileGlobOptions::ALLOW_EMPTY) {
throw IOException("%s needs at least one file to read", function_name);
}
return std::move(res);
}

shared_ptr<MultiFileList> MultiFileReader::CreateFileList(ClientContext &context, const Value &input,
FileGlobOptions options) {
const FileGlobInput &glob_input) {
auto paths = ParsePaths(input);
return CreateFileList(context, paths, options);
return CreateFileList(context, paths, glob_input);
}

bool MultiFileReader::ParseOption(const string &key, const Value &val, MultiFileOptions &options,
Expand Down Expand Up @@ -645,6 +645,10 @@ void MultiFileReader::PruneReaders(MultiFileBindData &data, MultiFileList &file_
}
}

FileGlobInput MultiFileReader::GetGlobInput(MultiFileReaderInterface &interface) {
return interface.GetGlobInput();
}

HivePartitioningIndex::HivePartitioningIndex(string value_p, idx_t index) : value(std::move(value_p)), index(index) {
}

Expand Down
4 changes: 2 additions & 2 deletions src/duckdb/src/common/radix_partitioning.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,9 @@ RETURN_TYPE RadixBitsSwitch(const idx_t radix_bits, ARGS &&... args) {
case 10:
return OP::template Operation<10>(std::forward<ARGS>(args)...);
case 11:
return OP::template Operation<10>(std::forward<ARGS>(args)...);
return OP::template Operation<11>(std::forward<ARGS>(args)...);
case 12:
return OP::template Operation<10>(std::forward<ARGS>(args)...);
return OP::template Operation<12>(std::forward<ARGS>(args)...);
default:
throw InternalException(
"radix_bits higher than RadixPartitioning::MAX_RADIX_BITS encountered in RadixBitsSwitch");
Expand Down
4 changes: 2 additions & 2 deletions src/duckdb/src/common/types/timestamp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -429,8 +429,8 @@ void Timestamp::Convert(timestamp_ns_t input, date_t &out_date, dtime_t &out_tim

timestamp_t Timestamp::GetCurrentTimestamp() {
auto now = system_clock::now();
auto epoch_ms = duration_cast<std::chrono::milliseconds>(now.time_since_epoch()).count();
return Timestamp::FromEpochMs(epoch_ms);
auto epoch_micros = duration_cast<microseconds>(now.time_since_epoch()).count();
return FromEpochMicroSeconds(epoch_micros);
}

timestamp_t Timestamp::FromEpochSecondsPossiblyInfinite(int64_t sec) {
Expand Down
3 changes: 2 additions & 1 deletion src/duckdb/src/common/types/vector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,8 @@ void Vector::Reference(const Value &value) {

void Vector::Reference(const Vector &other) {
if (other.GetType().id() != GetType().id()) {
throw InternalException("Vector::Reference used on vector of different type");
throw InternalException("Vector::Reference used on vector of different type (source %s referenced %s)",
GetType(), other.GetType());
}
D_ASSERT(other.GetType() == GetType());
Reinterpret(other);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,7 @@

namespace duckdb {

unique_ptr<MultiFileReaderInterface>
CSVMultiFileInfo::InitializeInterface(ClientContext &context, MultiFileReader &reader, MultiFileList &file_list) {
unique_ptr<MultiFileReaderInterface> CSVMultiFileInfo::CreateInterface(ClientContext &context) {
return make_uniq<CSVMultiFileInfo>();
}

Expand Down Expand Up @@ -417,4 +416,8 @@ double CSVFileScan::GetProgressInFile(ClientContext &context) {
return file_progress * 100.0;
}

FileGlobInput CSVMultiFileInfo::GetGlobInput() {
return FileGlobInput(FileGlobOptions::FALLBACK_GLOB, "csv");
}

} // namespace duckdb
2 changes: 1 addition & 1 deletion src/duckdb/src/execution/physical_plan/plan_merge_into.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ PhysicalOperator &DuckCatalog::PlanMergeInto(ClientContext &context, PhysicalPla

PhysicalOperator &Catalog::PlanMergeInto(ClientContext &context, PhysicalPlanGenerator &planner, LogicalMergeInto &op,
PhysicalOperator &plan) {
throw NotImplementedException("Database type \"%s\" does not support MERGE INTO or ON CONFLICT", GetName());
throw NotImplementedException("Database type \"%s\" does not support MERGE INTO or ON CONFLICT", GetCatalogType());
}

PhysicalOperator &PhysicalPlanGenerator::CreatePlan(LogicalMergeInto &op) {
Expand Down
2 changes: 1 addition & 1 deletion src/duckdb/src/function/table/copy_csv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -379,7 +379,7 @@ bool WriteCSVRotateFiles(FunctionData &, const optional_idx &file_size_bytes) {

bool WriteCSVRotateNextFile(GlobalFunctionData &gstate, FunctionData &, const optional_idx &file_size_bytes) {
auto &global_state = gstate.Cast<GlobalWriteCSVData>();
return global_state.FileSize() > file_size_bytes.GetIndex();
return file_size_bytes.IsValid() && global_state.FileSize() > file_size_bytes.GetIndex();
}

void CSVCopyFunction::RegisterFunction(BuiltinFunctions &set) {
Expand Down
2 changes: 1 addition & 1 deletion src/duckdb/src/function/table/direct_file_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
namespace duckdb {

DirectFileReader::DirectFileReader(OpenFileInfo file_p, const LogicalType &type)
: BaseFileReader(std::move(file_p)), type(type), done(false) {
: BaseFileReader(std::move(file_p)), done(false), type(type) {
columns.push_back(MultiFileColumnDefinition("filename", LogicalType::VARCHAR));
columns.push_back(MultiFileColumnDefinition("content", type));
columns.push_back(MultiFileColumnDefinition("size", LogicalType::BIGINT));
Expand Down
13 changes: 8 additions & 5 deletions src/duckdb/src/function/table/read_file.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,7 @@ namespace duckdb {
//------------------------------------------------------------------------------

template <class OP>
unique_ptr<MultiFileReaderInterface> DirectMultiFileInfo<OP>::InitializeInterface(ClientContext &context,
MultiFileReader &reader,
MultiFileList &file_list) {
unique_ptr<MultiFileReaderInterface> DirectMultiFileInfo<OP>::CreateInterface(ClientContext &context) {
return make_uniq<DirectMultiFileInfo>();
};

Expand Down Expand Up @@ -126,14 +124,19 @@ unique_ptr<NodeStatistics> DirectMultiFileInfo<OP>::GetCardinality(const MultiFi
result->has_estimated_cardinality = true;
result->estimated_cardinality = bind_data.file_list->GetTotalFileCount();
return result;
};
}

template <class OP>
FileGlobInput DirectMultiFileInfo<OP>::GetGlobInput() {
return FileGlobOptions::ALLOW_EMPTY;
}

//------------------------------------------------------------------------------
// Register
//------------------------------------------------------------------------------
template <class OP>
static TableFunction GetFunction() {
MultiFileFunction<DirectMultiFileInfo<OP>, DirectFileGlobOptions> table_function(OP::NAME);
MultiFileFunction<DirectMultiFileInfo<OP>> table_function(OP::NAME);
return table_function;
}

Expand Down
2 changes: 1 addition & 1 deletion src/duckdb/src/function/table/system/duckdb_log.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ static unique_ptr<FunctionData> DuckDBLogBind(ClientContext &context, TableFunct
return_types.emplace_back(LogicalType::UBIGINT);

names.emplace_back("timestamp");
return_types.emplace_back(LogicalType::TIMESTAMP);
return_types.emplace_back(LogicalType::TIMESTAMP_TZ);

names.emplace_back("type");
return_types.emplace_back(LogicalType::VARCHAR);
Expand Down
Loading
Loading