Skip to content

Commit

Permalink
Issue duckdb#1791: Aggregate Row Layout
Browse files Browse the repository at this point in the history
Pull aggregate alignment into the aggregation operations
so that there are no longer these strange state
dependencies and code replication.
  • Loading branch information
Richard Wesley committed May 27, 2021
1 parent 1310af6 commit 3dee4f7
Show file tree
Hide file tree
Showing 4 changed files with 16 additions and 9 deletions.
13 changes: 12 additions & 1 deletion src/common/row_operations/row_aggregate.cpp
Expand Up @@ -41,7 +41,7 @@ void RowOperations::DestroyStates(RowLayout &layout, Vector &addresses, idx_t co
if (aggr.function.destructor) {
aggr.function.destructor(addresses, count);
}
// move to the next aggregate state
// Move to the next aggregate state
VectorOperations::AddInPlace(addresses, aggr.payload_size, count);
}
}
Expand Down Expand Up @@ -74,21 +74,32 @@ void RowOperations::CombineStates(RowLayout &layout, Vector &sources, Vector &ta
return;
}

// Move to the first aggregate states
VectorOperations::AddInPlace(sources, layout.GetAggrOffset(), count);
VectorOperations::AddInPlace(targets, layout.GetAggrOffset(), count);
for (auto &aggr : layout.GetAggregates()) {
D_ASSERT(aggr.function.combine);
aggr.function.combine(sources, targets, count);

// Move to the next aggregate states
VectorOperations::AddInPlace(sources, aggr.payload_size, count);
VectorOperations::AddInPlace(targets, aggr.payload_size, count);
}
}

void RowOperations::FinalizeStates(RowLayout &layout, Vector &addresses, DataChunk &result, idx_t aggr_idx) {
// Move to the first aggregate state
VectorOperations::AddInPlace(addresses, layout.GetAggrOffset(), result.size());

auto &aggregates = layout.GetAggregates();
for (idx_t i = 0; i < aggregates.size(); i++) {
auto &target = result.data[aggr_idx + i];
auto &aggr = aggregates[i];
aggr.function.finalize(addresses, aggr.bind_data, target, result.size());

// Move to the next aggregate state
VectorOperations::AddInPlace(addresses, aggr.payload_size, result.size());
}
}

} // namespace duckdb
7 changes: 2 additions & 5 deletions src/execution/aggregate_hashtable.cpp
Expand Up @@ -278,6 +278,7 @@ idx_t GroupedAggregateHashTable::AddChunk(DataChunk &groups, Vector &group_hashe

Vector addresses(LogicalType::POINTER);
auto new_group_count = FindOrCreateGroups(groups, group_hashes, addresses, new_groups);
VectorOperations::AddInPlace(addresses, layout.GetAggrOffset(), payload.size());

// now every cell has an entry
// update the aggregates
Expand Down Expand Up @@ -734,9 +735,7 @@ idx_t GroupedAggregateHashTable::FindOrCreateGroupsInternal(DataChunk &groups, V
sel_vector = &no_match_vector;
remaining_entries = no_match_count;
}
// pointers in addresses now were moved behind the groups by CompareGroups/ScatterGroups but we may have to add
// padding still to point at the payload.
VectorOperations::AddInPlace(addresses, layout.GetAggrOffset(), groups.size());

return new_group_count;
}

Expand Down Expand Up @@ -779,7 +778,6 @@ void GroupedAggregateHashTable::FlushMove(Vector &source_addresses, Vector &sour
auto &column = groups.data[i];
VectorOperations::Gather::Set(source_addresses, column, count, offsets[i], i);
}
VectorOperations::AddInPlace(source_addresses, layout.GetAggrOffset(), count);

SelectionVector new_groups(STANDARD_VECTOR_SIZE);
Vector group_addresses(LogicalType::POINTER);
Expand Down Expand Up @@ -906,7 +904,6 @@ idx_t GroupedAggregateHashTable::Scan(idx_t &scan_position, DataChunk &result) {
auto &column = result.data[i];
VectorOperations::Gather::Set(addresses, column, result.size(), offsets[i], i);
}
VectorOperations::AddInPlace(addresses, layout.GetAggrOffset(), result.size());

RowOperations::FinalizeStates(layout, addresses, result, group_cols);

Expand Down
1 change: 0 additions & 1 deletion src/execution/perfect_aggregate_hashtable.cpp
Expand Up @@ -140,7 +140,6 @@ void PerfectAggregateHashTable::Combine(PerfectAggregateHashTable &other) {
Vector target_addresses(LogicalType::POINTER);
auto source_addresses_ptr = FlatVector::GetData<data_ptr_t>(source_addresses);
auto target_addresses_ptr = FlatVector::GetData<data_ptr_t>(target_addresses);
auto reinit_addresses_ptr = FlatVector::GetData<data_ptr_t>(addresses);

// iterate over all entries of both hash tables and call combine for all entries that can be combined
data_ptr_t source_ptr = other.data;
Expand Down
4 changes: 2 additions & 2 deletions src/include/duckdb/common/row_operations/row_operations.hpp
Expand Up @@ -31,9 +31,9 @@ struct RowOperations {
static void UpdateStates(AggregateObject &aggr, Vector &addresses, DataChunk &payload, idx_t arg_idx, idx_t count);
//! filtered update - aligned addresses
static void UpdateFilteredStates(AggregateObject &aggr, Vector &addresses, DataChunk &payload, idx_t arg_idx);
//! combine - aligned addresses, updated
//! combine - unaligned addresses, updated
static void CombineStates(RowLayout &layout, Vector &sources, Vector &targets, idx_t count);
//! finalize - aligned addresses, updated
//! finalize - unaligned addresses, updated
static void FinalizeStates(RowLayout &layout, Vector &addresses, DataChunk &result, idx_t aggr_idx);
};

Expand Down

0 comments on commit 3dee4f7

Please sign in to comment.