Skip to content

Commit

Permalink
Resolve a bug where datasketches would not downsample sketches suffic…
Browse files Browse the repository at this point in the history
…iently (#16119)

* Fix sketch memory issue

* Rename function

* Add unit test

* Revert downsampling change
  • Loading branch information
adarshsanjeev committed May 14, 2024
1 parent b8dd747 commit 18a4722
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,12 @@ public long estimatedTotalWeight()
return count;
}

@VisibleForTesting
long getTotalRetainedBytes()
{
return totalRetainedBytes;
}

@Override
public boolean hasMultipleValues(final int keyPosition)
{
Expand Down Expand Up @@ -414,7 +420,7 @@ private BucketHolder getOrCreateBucketHolder(final RowKey bucketKey)
void downSample()
{
long newTotalRetainedBytes = totalRetainedBytes;
final long targetTotalRetainedBytes = totalRetainedBytes / 2;
final long targetTotalRetainedBytes = Math.min(totalRetainedBytes / 2, maxRetainedBytes);

final List<Pair<Long, BucketHolder>> sortedHolders = new ArrayList<>(buckets.size());
final RowKeyReader trimmedRowReader = keyReader.trimmedKeyReader(clusterBy.getBucketByCount());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -451,6 +451,26 @@ public void test_clusterByXYbucketByX_maxX_lowCardinalityY_withAggregation()
);
}

@Test
public void testShouldDownsampleSingleBucket()
{
ClusterByStatisticsCollectorImpl clusterByStatisticsCollector =
(ClusterByStatisticsCollectorImpl) ClusterByStatisticsCollectorImpl.create(
CLUSTER_BY_XYZ_BUCKET_BY_X,
SIGNATURE,
35000,
500,
false,
false
);

clusterByStatisticsCollector.add(createKey(CLUSTER_BY_XYZ_BUCKET_BY_X, 2, 1, "value1"), 1);
clusterByStatisticsCollector.add(createKey(CLUSTER_BY_XYZ_BUCKET_BY_X, 2, 3, "value2"), 1);
clusterByStatisticsCollector.add(createKey(CLUSTER_BY_XYZ_BUCKET_BY_X, 1, 1, "Extremely long key string for unit test; Extremely long key string for unit test;"), 500);

Assert.assertTrue(clusterByStatisticsCollector.getTotalRetainedBytes() <= 35000);
}

@Test
public void testBucketDownsampledToSingleKeyFinishesCorrectly()
{
Expand Down

0 comments on commit 18a4722

Please sign in to comment.