From 52b6cd2c2038c56014e053751090085aa79c6a74 Mon Sep 17 00:00:00 2001
From: Aaron Davidson <aaron@databricks.com>
Date: Thu, 9 Oct 2014 17:40:04 -0700
Subject: [PATCH 1/2] [SPARK-3889] Attempt to avoid SIGBUS by not mmapping
 files in ConnectionManager

In general, individual shuffle blocks are frequently small, so mmapping them
often creates a lot of waste. It may not be bad to mmap the larger ones, but
it is pretty inconvenient to get configuration into ManagedBuffer, and besides
it is unlikely to help all that much.

Note that user of ManagedBuffer#nioByteBuffer() seems generally bad practice,
and would ideally never be used for data that may be large. Users of such data
would ideally stream the data instead.
---
 .../main/scala/org/apache/spark/network/ManagedBuffer.scala  | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/core/src/main/scala/org/apache/spark/network/ManagedBuffer.scala b/core/src/main/scala/org/apache/spark/network/ManagedBuffer.scala
index a4409181ec907..164242a614464 100644
--- a/core/src/main/scala/org/apache/spark/network/ManagedBuffer.scala
+++ b/core/src/main/scala/org/apache/spark/network/ManagedBuffer.scala
@@ -72,7 +72,10 @@ final class FileSegmentManagedBuffer(val file: File, val offset: Long, val lengt
     var channel: FileChannel = null
     try {
       channel = new RandomAccessFile(file, "r").getChannel
-      channel.map(MapMode.READ_ONLY, offset, length)
+      val buf = ByteBuffer.allocate(length.toInt)
+      channel.read(buf, offset)
+      buf.flip()
+      buf
     } catch {
       case e: IOException =>
         Try(channel.size).toOption match {

From a152065971d6045170137ce1358e16e52b0d9cc9 Mon Sep 17 00:00:00 2001
From: Aaron Davidson <aaron@databricks.com>
Date: Fri, 10 Oct 2014 00:33:09 -0700
Subject: [PATCH 2/2] Add other pathway back

---
 .../apache/spark/network/ManagedBuffer.scala  | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/network/ManagedBuffer.scala b/core/src/main/scala/org/apache/spark/network/ManagedBuffer.scala
index 164242a614464..4c9ca97a2a6b7 100644
--- a/core/src/main/scala/org/apache/spark/network/ManagedBuffer.scala
+++ b/core/src/main/scala/org/apache/spark/network/ManagedBuffer.scala
@@ -66,16 +66,27 @@ sealed abstract class ManagedBuffer {
 final class FileSegmentManagedBuffer(val file: File, val offset: Long, val length: Long)
   extends ManagedBuffer {
 
+  /**
+   * Memory mapping is expensive and can destabilize the JVM (SPARK-1145, SPARK-3889).
+   * Avoid unless there's a good reason not to.
+   */
+  private val MIN_MEMORY_MAP_BYTES = 2 * 1024 * 1024;
+
   override def size: Long = length
 
   override def nioByteBuffer(): ByteBuffer = {
     var channel: FileChannel = null
     try {
       channel = new RandomAccessFile(file, "r").getChannel
-      val buf = ByteBuffer.allocate(length.toInt)
-      channel.read(buf, offset)
-      buf.flip()
-      buf
+      // Just copy the buffer if it's sufficiently small, as memory mapping has a high overhead.
+      if (length < MIN_MEMORY_MAP_BYTES) {
+        val buf = ByteBuffer.allocate(length.toInt)
+        channel.read(buf, offset)
+        buf.flip()
+        buf
+      } else {
+        channel.map(MapMode.READ_ONLY, offset, length)
+      }
     } catch {
       case e: IOException =>
         Try(channel.size).toOption match {