Skip to content

Commit

Permalink
Hive partitioning: Fix preprocessing of CreateDirectories
Browse files Browse the repository at this point in the history
Fixes duckdblabs/duckdb-internal#588 improving on duckdb#9473.
Idea is that we iterate on all global partitions instead of iterating on the local ones.
  • Loading branch information
carlopi committed Nov 1, 2023
1 parent 182b824 commit 3962bb7
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 2 deletions.
8 changes: 8 additions & 0 deletions src/common/hive_partitioning.cpp
Expand Up @@ -316,6 +316,14 @@ std::map<idx_t, const HivePartitionKey *> HivePartitionedColumnData::GetReverseM
return ret;
}

std::map<idx_t, const HivePartitionKey *> GlobalHivePartitionState::GetReverseMap() {
std::map<idx_t, const HivePartitionKey *> ret;
for (const auto &pair : partition_map) {
ret[pair.second] = &(pair.first);
}
return ret;
}

void HivePartitionedColumnData::GrowAllocators() {
unique_lock<mutex> lck_gstate(allocators->lock);

Expand Down
5 changes: 3 additions & 2 deletions src/execution/operator/persistent/physical_copy_to_file.cpp
Expand Up @@ -123,8 +123,9 @@ SinkCombineResultType PhysicalCopyToFile::Combine(ExecutionContext &context, Ope
// create directories
lock_guard<mutex> global_lock(g.lock);
if (!g.created_directories) {
for (idx_t i = 0; i < partitions.size(); i++) {
CreateDirectories(partition_columns, names, partition_key_map[i]->values, trimmed_path, fs);
auto global_partition_key_map = g.partition_state->GetReverseMap();
for (auto &it : global_partition_key_map) {
CreateDirectories(partition_columns, names, it.second->values, trimmed_path, fs);
}
g.created_directories = true;
}
Expand Down
1 change: 1 addition & 0 deletions src/include/duckdb/common/hive_partitioning.hpp
Expand Up @@ -76,6 +76,7 @@ class GlobalHivePartitionState {
hive_partition_map_t partition_map;
//! Used for incremental updating local copies of the partition map;
vector<hive_partition_map_t::const_iterator> partitions;
std::map<idx_t, const HivePartitionKey *> GetReverseMap();
};

class HivePartitionedColumnData : public PartitionedColumnData {
Expand Down

0 comments on commit 3962bb7

Please sign in to comment.