Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,26 @@

package org.apache.spark.util.collection

import java.lang.ref.WeakReference
import java.util.Arrays
import java.util.concurrent.TimeUnit

import org.apache.spark.SparkFunSuite
import org.apache.spark.util.random.XORShiftRandom

class SorterSuite extends SparkFunSuite {

/** Run GC and wait until it has actually run, to free memory used by prior tests. */
private def runGC(): Unit = {
Copy link
Copy Markdown
Member

@pan3793 pan3793 May 24, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How about promoting protected def runGC(): Unit defined in ContextCleanerSuiteBase to SparkFunSuite and replacing all direct calls of System.gc() with runGC() in test code? - with a quick search, there are dozens of System.gc() in test code

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the suggestion. SorterSuite is the only one I'm seeing flaky in CI right now, so I'd prefer to keep this PR focused on fixing it.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay, but could you lift and reuse def runGC() instead of replicating it here?

val weakRef = new WeakReference(new Object())
val startTimeNs = System.nanoTime()
System.gc()
while (System.nanoTime() - startTimeNs < TimeUnit.SECONDS.toNanos(10) && weakRef.get != null) {
System.gc()
Thread.sleep(200)
}
}

test("equivalent to Arrays.sort") {
val rand = new XORShiftRandom(123)
val data0 = Array.tabulate[Int](10000) { i => rand.nextInt() }
Expand Down Expand Up @@ -71,7 +84,6 @@ class SorterSuite extends SparkFunSuite {
}

test("java.lang.ArrayIndexOutOfBoundsException in TimSort") {
System.gc()
// scalastyle:off
val runLengths = Array(76405736, 74830360, 1181532, 787688, 1575376, 2363064, 3938440, 6301504,
1181532, 393844, 15753760, 1575376, 787688, 393844, 1969220, 3150752, 1181532,787688, 5513816, 3938440,
Expand Down Expand Up @@ -140,7 +152,16 @@ class SorterSuite extends SparkFunSuite {
21, 20, 22, 18, 452, 114, 95, 18, 17, 21, 36, 18, 17, 115, 76, 144, 44, 38, 61,20, 19, 21, 17)
// scalastyle:on
val arrayToSortSize = 1091482190
val arrayToSort = new Array[Byte](arrayToSortSize)
// Retry once after forcing GC: memory held by the previous test (e.g. the ~256 MB
// int array in "SPARK-5984 TimSort bug") may not be reclaimed before this >1 GB
// allocation, causing flaky OOM in CI.
val arrayToSort = try {
new Array[Byte](arrayToSortSize)
} catch {
case _: OutOfMemoryError =>
runGC()
new Array[Byte](arrayToSortSize)
}
var sum: Int = -1
for (i <- runLengths) {
sum += i
Expand Down