From 52b6cd2c2038c56014e053751090085aa79c6a74 Mon Sep 17 00:00:00 2001 From: Aaron Davidson Date: Thu, 9 Oct 2014 17:40:04 -0700 Subject: [PATCH 1/2] [SPARK-3889] Attempt to avoid SIGBUS by not mmapping files in ConnectionManager In general, individual shuffle blocks are frequently small, so mmapping them often creates a lot of waste. It may not be bad to mmap the larger ones, but it is pretty inconvenient to get configuration into ManagedBuffer, and besides it is unlikely to help all that much. Note that user of ManagedBuffer#nioByteBuffer() seems generally bad practice, and would ideally never be used for data that may be large. Users of such data would ideally stream the data instead. --- .../main/scala/org/apache/spark/network/ManagedBuffer.scala | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/core/src/main/scala/org/apache/spark/network/ManagedBuffer.scala b/core/src/main/scala/org/apache/spark/network/ManagedBuffer.scala index a4409181ec907..164242a614464 100644 --- a/core/src/main/scala/org/apache/spark/network/ManagedBuffer.scala +++ b/core/src/main/scala/org/apache/spark/network/ManagedBuffer.scala @@ -72,7 +72,10 @@ final class FileSegmentManagedBuffer(val file: File, val offset: Long, val lengt var channel: FileChannel = null try { channel = new RandomAccessFile(file, "r").getChannel - channel.map(MapMode.READ_ONLY, offset, length) + val buf = ByteBuffer.allocate(length.toInt) + channel.read(buf, offset) + buf.flip() + buf } catch { case e: IOException => Try(channel.size).toOption match { From a152065971d6045170137ce1358e16e52b0d9cc9 Mon Sep 17 00:00:00 2001 From: Aaron Davidson Date: Fri, 10 Oct 2014 00:33:09 -0700 Subject: [PATCH 2/2] Add other pathway back --- .../apache/spark/network/ManagedBuffer.scala | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/network/ManagedBuffer.scala b/core/src/main/scala/org/apache/spark/network/ManagedBuffer.scala index 164242a614464..4c9ca97a2a6b7 100644 --- a/core/src/main/scala/org/apache/spark/network/ManagedBuffer.scala +++ b/core/src/main/scala/org/apache/spark/network/ManagedBuffer.scala @@ -66,16 +66,27 @@ sealed abstract class ManagedBuffer { final class FileSegmentManagedBuffer(val file: File, val offset: Long, val length: Long) extends ManagedBuffer { + /** + * Memory mapping is expensive and can destabilize the JVM (SPARK-1145, SPARK-3889). + * Avoid unless there's a good reason not to. + */ + private val MIN_MEMORY_MAP_BYTES = 2 * 1024 * 1024; + override def size: Long = length override def nioByteBuffer(): ByteBuffer = { var channel: FileChannel = null try { channel = new RandomAccessFile(file, "r").getChannel - val buf = ByteBuffer.allocate(length.toInt) - channel.read(buf, offset) - buf.flip() - buf + // Just copy the buffer if it's sufficiently small, as memory mapping has a high overhead. + if (length < MIN_MEMORY_MAP_BYTES) { + val buf = ByteBuffer.allocate(length.toInt) + channel.read(buf, offset) + buf.flip() + buf + } else { + channel.map(MapMode.READ_ONLY, offset, length) + } } catch { case e: IOException => Try(channel.size).toOption match {