Skip to content

Commit

Permalink
DRILL-6030: Managed sort should minimize number of batches in a k-way…
Browse files Browse the repository at this point in the history
… merge

This closes #1075
  • Loading branch information
vrozov authored and parthchandra committed Jan 12, 2018
1 parent fa2005e commit dcaac1b
Show file tree
Hide file tree
Showing 4 changed files with 12 additions and 13 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ public class SortConfig {
public static final int MIN_SPILL_BATCH_SIZE = 256 * 1024;
public static final int MIN_MERGE_BATCH_SIZE = 256 * 1024;

public static final int DEFAULT_MERGE_LIMIT = 128;
public static final int MIN_MERGE_LIMIT = 2;

private final long maxMemory;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -687,11 +687,11 @@ public MergeTask consolidateBatches(long allocMemory, int inMemCount, int spille
spillBatchSize.maxBufferSize);
memMergeLimit = Math.max(0, memMergeLimit);

// If batches are in memory, and we need more memory to merge
// them all than is actually available, then spill some in-memory
// batches.
// If batches are in memory, and final merge count will exceed
// merge limit or we need more memory to merge them all than is
// actually available, then spill some in-memory batches.

if (inMemCount > 0 && memMergeLimit < spilledRunsCount) {
if (inMemCount > 0 && ((inMemCount + spilledRunsCount) > config.mergeLimit() || memMergeLimit < spilledRunsCount)) {
return new MergeTask(MergeAction.SPILL, 0);
}

Expand Down
4 changes: 2 additions & 2 deletions exec/java-exec/src/main/resources/drill-module.conf
Original file line number Diff line number Diff line change
Expand Up @@ -260,10 +260,10 @@ drill.exec: {
// value provided by Foreman. Primarily for testing.
// 0 = unlimited, Supports HOCON memory suffixes.
mem_limit: 0,
// Limit on the number of spilled batches that can be merged in
// Limit on the number of batches that can be merged in
// a single pass. Limits the number of open file handles.
// 0 = unlimited
merge_limit: 0,
merge_limit: 128,
spill: {
// Deprecated for managed xsort; used only by legacy xsort
group.size: 40000,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ public void testConfigDefaults() {
// Zero means no artificial limit
assertEquals(0, sortConfig.maxMemory());
// Zero mapped to large number
assertEquals(Integer.MAX_VALUE, sortConfig.mergeLimit());
assertEquals(SortConfig.DEFAULT_MERGE_LIMIT, sortConfig.mergeLimit());
// Default size: 256 MiB
assertEquals(256 * ONE_MEG, sortConfig.spillFileSize());
// Default size: 1 MiB
Expand Down Expand Up @@ -622,14 +622,12 @@ public void testMergeLimit() {
int spillRunCount = mergeLimitConstraint;
long allocMemory = batchSize * memBatchCount;
MergeTask task = memManager.consolidateBatches(allocMemory, memBatchCount, spillRunCount);
assertEquals(MergeAction.NONE, task.action);
assertEquals(MergeAction.SPILL, task.action);

// One more run than can merge in one go. But, we have plenty of
// memory to merge and hold the in-memory batches. So, just merge.
// too many to merge, spill

task = memManager.consolidateBatches(allocMemory, memBatchCount, spillRunCount + 1);
assertEquals(MergeAction.MERGE, task.action);
assertEquals(2, task.count);
task = memManager.consolidateBatches(allocMemory, 1, spillRunCount);
assertEquals(MergeAction.SPILL, task.action);

// One more runs than can merge in one go, intermediate merge

Expand Down

0 comments on commit dcaac1b

Please sign in to comment.