From 3dee4f79a0bc86dd703ba6a2fcd1bb9f9ea2c7f5 Mon Sep 17 00:00:00 2001 From: Richard Wesley Date: Thu, 27 May 2021 12:44:28 -0700 Subject: [PATCH] Issue #1791: Aggregate Row Layout Pull aggregate alignment into the aggregation operations so that there are no longer these strange state dependencies and code replication. --- src/common/row_operations/row_aggregate.cpp | 13 ++++++++++++- src/execution/aggregate_hashtable.cpp | 7 ++----- src/execution/perfect_aggregate_hashtable.cpp | 1 - .../duckdb/common/row_operations/row_operations.hpp | 4 ++-- 4 files changed, 16 insertions(+), 9 deletions(-) diff --git a/src/common/row_operations/row_aggregate.cpp b/src/common/row_operations/row_aggregate.cpp index 18a52ea1438..7682faacaa7 100644 --- a/src/common/row_operations/row_aggregate.cpp +++ b/src/common/row_operations/row_aggregate.cpp @@ -41,7 +41,7 @@ void RowOperations::DestroyStates(RowLayout &layout, Vector &addresses, idx_t co if (aggr.function.destructor) { aggr.function.destructor(addresses, count); } - // move to the next aggregate state + // Move to the next aggregate state VectorOperations::AddInPlace(addresses, aggr.payload_size, count); } } @@ -74,21 +74,32 @@ void RowOperations::CombineStates(RowLayout &layout, Vector &sources, Vector &ta return; } + // Move to the first aggregate states + VectorOperations::AddInPlace(sources, layout.GetAggrOffset(), count); + VectorOperations::AddInPlace(targets, layout.GetAggrOffset(), count); for (auto &aggr : layout.GetAggregates()) { D_ASSERT(aggr.function.combine); aggr.function.combine(sources, targets, count); + + // Move to the next aggregate states VectorOperations::AddInPlace(sources, aggr.payload_size, count); VectorOperations::AddInPlace(targets, aggr.payload_size, count); } } void RowOperations::FinalizeStates(RowLayout &layout, Vector &addresses, DataChunk &result, idx_t aggr_idx) { + // Move to the first aggregate state + VectorOperations::AddInPlace(addresses, layout.GetAggrOffset(), result.size()); + auto &aggregates = layout.GetAggregates(); for (idx_t i = 0; i < aggregates.size(); i++) { auto &target = result.data[aggr_idx + i]; auto &aggr = aggregates[i]; aggr.function.finalize(addresses, aggr.bind_data, target, result.size()); + + // Move to the next aggregate state VectorOperations::AddInPlace(addresses, aggr.payload_size, result.size()); } } + } // namespace duckdb diff --git a/src/execution/aggregate_hashtable.cpp b/src/execution/aggregate_hashtable.cpp index e8d3205563c..04ff94ad7c0 100644 --- a/src/execution/aggregate_hashtable.cpp +++ b/src/execution/aggregate_hashtable.cpp @@ -278,6 +278,7 @@ idx_t GroupedAggregateHashTable::AddChunk(DataChunk &groups, Vector &group_hashe Vector addresses(LogicalType::POINTER); auto new_group_count = FindOrCreateGroups(groups, group_hashes, addresses, new_groups); + VectorOperations::AddInPlace(addresses, layout.GetAggrOffset(), payload.size()); // now every cell has an entry // update the aggregates @@ -734,9 +735,7 @@ idx_t GroupedAggregateHashTable::FindOrCreateGroupsInternal(DataChunk &groups, V sel_vector = &no_match_vector; remaining_entries = no_match_count; } - // pointers in addresses now were moved behind the groups by CompareGroups/ScatterGroups but we may have to add - // padding still to point at the payload. - VectorOperations::AddInPlace(addresses, layout.GetAggrOffset(), groups.size()); + return new_group_count; } @@ -779,7 +778,6 @@ void GroupedAggregateHashTable::FlushMove(Vector &source_addresses, Vector &sour auto &column = groups.data[i]; VectorOperations::Gather::Set(source_addresses, column, count, offsets[i], i); } - VectorOperations::AddInPlace(source_addresses, layout.GetAggrOffset(), count); SelectionVector new_groups(STANDARD_VECTOR_SIZE); Vector group_addresses(LogicalType::POINTER); @@ -906,7 +904,6 @@ idx_t GroupedAggregateHashTable::Scan(idx_t &scan_position, DataChunk &result) { auto &column = result.data[i]; VectorOperations::Gather::Set(addresses, column, result.size(), offsets[i], i); } - VectorOperations::AddInPlace(addresses, layout.GetAggrOffset(), result.size()); RowOperations::FinalizeStates(layout, addresses, result, group_cols); diff --git a/src/execution/perfect_aggregate_hashtable.cpp b/src/execution/perfect_aggregate_hashtable.cpp index 2420468cbba..fad08ea7f98 100644 --- a/src/execution/perfect_aggregate_hashtable.cpp +++ b/src/execution/perfect_aggregate_hashtable.cpp @@ -140,7 +140,6 @@ void PerfectAggregateHashTable::Combine(PerfectAggregateHashTable &other) { Vector target_addresses(LogicalType::POINTER); auto source_addresses_ptr = FlatVector::GetData(source_addresses); auto target_addresses_ptr = FlatVector::GetData(target_addresses); - auto reinit_addresses_ptr = FlatVector::GetData(addresses); // iterate over all entries of both hash tables and call combine for all entries that can be combined data_ptr_t source_ptr = other.data; diff --git a/src/include/duckdb/common/row_operations/row_operations.hpp b/src/include/duckdb/common/row_operations/row_operations.hpp index 9cd5ce1062c..c4cfed89b58 100644 --- a/src/include/duckdb/common/row_operations/row_operations.hpp +++ b/src/include/duckdb/common/row_operations/row_operations.hpp @@ -31,9 +31,9 @@ struct RowOperations { static void UpdateStates(AggregateObject &aggr, Vector &addresses, DataChunk &payload, idx_t arg_idx, idx_t count); //! filtered update - aligned addresses static void UpdateFilteredStates(AggregateObject &aggr, Vector &addresses, DataChunk &payload, idx_t arg_idx); - //! combine - aligned addresses, updated + //! combine - unaligned addresses, updated static void CombineStates(RowLayout &layout, Vector &sources, Vector &targets, idx_t count); - //! finalize - aligned addresses, updated + //! finalize - unaligned addresses, updated static void FinalizeStates(RowLayout &layout, Vector &addresses, DataChunk &result, idx_t aggr_idx); };