Skip to content
Permalink
Browse files
adjust topn heap operation when string is dictionary encoded, but not…
… uniquely (#12291)

* add topn heap optimization when string is dictionary encoded, but not uniquely

* use array instead

* is same

* fix javadoc

* fix

* Update StringTopNColumnAggregatesProcessor.java
  • Loading branch information
clintropolis committed Mar 8, 2022
1 parent 0e097ea commit dae53ae36a357919698cb8bb3e90934ee64b9fcf
Showing 1 changed file with 20 additions and 14 deletions.
@@ -52,9 +52,8 @@ public StringTopNColumnAggregatesProcessor(final ColumnCapabilities capabilities
@Override
public int getCardinality(DimensionSelector selector)
{
// only report the underlying selector cardinality if the column the selector is for is dictionary encoded, and
// the dictionary values are unique, that is they have a 1:1 mapping between dictionaryId and column value
if (capabilities.isDictionaryEncoded().and(capabilities.areDictionaryValuesUnique()).isTrue()) {
// only report the underlying selector cardinality if the column the selector is for is dictionary encoded
if (capabilities.isDictionaryEncoded().isTrue()) {
return selector.getValueCardinality();
}
return DimensionDictionarySelector.CARDINALITY_UNKNOWN;
@@ -117,17 +116,11 @@ public long scanAndAggregate(
)
{
final boolean notUnknown = selector.getValueCardinality() != DimensionDictionarySelector.CARDINALITY_UNKNOWN;
final boolean unique = capabilities.isDictionaryEncoded().and(capabilities.areDictionaryValuesUnique()).isTrue();
// we must know cardinality to use array based aggregation
// we check for uniquely dictionary encoded values because non-unique (meaning dictionary ids do not have a 1:1
// relation with values) negates many of the benefits of array aggregation:
// - if different dictionary ids map to the same value but dictionary ids are unique to that value (*:1), then
// array aggregation will be correct but will still have to potentially perform many map lookups and lose the
// performance benefit array aggregation is trying to provide
// - in cases where the same dictionary ids map to different values (1:* or *:*), results can be entirely
// incorrect since an aggregator for a different value might be chosen from the array based on the re-used
// dictionary id
if (notUnknown && unique) {
final boolean hasDictionary = capabilities.isDictionaryEncoded().isTrue();
// we must know cardinality to use array based aggregation. in cases where the same dictionary ids map to different
// values (1:* or *:*), results can be entirely incorrect since an aggregator for a different value might be
// chosen from the array based on the re-used dictionary id
if (notUnknown && hasDictionary) {
return scanAndAggregateWithCardinalityKnown(query, cursor, selector, rowSelector);
} else {
return scanAndAggregateWithCardinalityUnknown(query, cursor, selector);
@@ -140,6 +133,11 @@ public void initAggregateStore()
this.aggregatesStore = new HashMap<>();
}

/**
* scan and aggregate when column is dictionary encoded and value cardinality is known up front, so values are
* aggregated into an array position specified by the dictionaryid, which if not already present, are translated
* into the key and fetched (or created if they key hasn't been encountered) from the {@link #aggregatesStore}
*/
private long scanAndAggregateWithCardinalityKnown(
TopNQuery query,
Cursor cursor,
@@ -172,6 +170,14 @@ private long scanAndAggregateWithCardinalityKnown(
return processedRows;
}

/**
* this method is to allow scan and aggregate when values are not dictionary encoded
* (e.g. {@link DimensionSelector#nameLookupPossibleInAdvance()} is false and/or when
* {@link ColumnCapabilities#isDictionaryEncoded()} is false). This mode also uses hash table aggregation, storing
* results in {@link #aggregatesStore}, and must call {@link DimensionSelector#lookupName(int)} for every row which
* is processed and cannot cache lookups, or use the dictionary id in any way other than to lookup the current row
* value.
*/
private long scanAndAggregateWithCardinalityUnknown(
TopNQuery query,
Cursor cursor,

0 comments on commit dae53ae

Please sign in to comment.