Skip to content

Commit

Permalink
Fix spill reading for large rows; add test
Browse files Browse the repository at this point in the history
  • Loading branch information
JoshRosen committed Jul 10, 2015
1 parent 2f48777 commit 5135200
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@ final class UnsafeSorterSpillReader extends UnsafeSorterIterator {
private long keyPrefix;
private int numRecordsRemaining;

private final byte[] arr = new byte[1024 * 1024]; // TODO: tune this (maybe grow dynamically)?
private final Object baseObject = arr;
private byte[] arr = new byte[1024 * 1024];
private Object baseObject = arr;
private final long baseOffset = PlatformDependent.BYTE_ARRAY_OFFSET;

public UnsafeSorterSpillReader(
Expand All @@ -63,6 +63,10 @@ public boolean hasNext() {
public void loadNext() throws IOException {
recordLength = din.readInt();
keyPrefix = din.readLong();
if (recordLength > arr.length) {
arr = new byte[recordLength];
baseObject = arr;
}
ByteStreams.readFully(in, arr, 0, recordLength);
numRecordsRemaining--;
if (numRecordsRemaining == 0) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,17 @@ class UnsafeExternalSortSuite extends SparkPlanTest with BeforeAndAfterAll {
}
}

test("sorting does not crash for large inputs") {
val sortOrder = 'a.asc :: Nil
val stringLength = 1024 * 1024 * 2
checkAnswer(
Seq(Tuple1("a" * stringLength), Tuple1("b" * stringLength)).toDF("a").repartition(1),
UnsafeExternalSort(sortOrder, global = true, _: SparkPlan, testSpillFrequency = 1),
Sort(sortOrder, global = true, _: SparkPlan),
sortAnswers = false
)
}

// Test sorting on different data types
for (
dataType <- DataTypeTestUtils.atomicTypes ++ Set(NullType)
Expand Down

0 comments on commit 5135200

Please sign in to comment.