apache · rxin · Sep 9, 2014 · Sep 9, 2014 · Sep 9, 2014 · Sep 10, 2014
diff --git a/core/src/main/scala/org/apache/spark/SparkEnv.scala b/core/src/main/scala/org/apache/spark/SparkEnv.scala
@@ -32,13 +32,15 @@ import org.apache.spark.api.python.PythonWorkerFactory
 import org.apache.spark.broadcast.BroadcastManager
 import org.apache.spark.metrics.MetricsSystem
 import org.apache.spark.network.BlockTransferService
+import org.apache.spark.network.netty.NettyBlockTransferService
 import org.apache.spark.network.nio.NioBlockTransferService
 import org.apache.spark.scheduler.LiveListenerBus
 import org.apache.spark.serializer.Serializer
 import org.apache.spark.shuffle.{ShuffleMemoryManager, ShuffleManager}
 import org.apache.spark.storage._
 import org.apache.spark.util.{AkkaUtils, Utils}
 
+
 /**
  * :: DeveloperApi ::
  * Holds all the runtime environment objects for a running Spark instance (either master or worker),
@@ -234,7 +236,12 @@ object SparkEnv extends Logging {
 
     val shuffleMemoryManager = new ShuffleMemoryManager(conf)
 
-    val blockTransferService = new NioBlockTransferService(conf, securityManager)
+    // TODO(rxin): Config option based on class name, similar to shuffle mgr and compression codec.
+    val blockTransferService = if (conf.getBoolean("spark.shuffle.use.netty", false)) {
+      new NettyBlockTransferService(conf)
+    } else {
+      new NioBlockTransferService(conf, securityManager)
+    }
 
     val blockManagerMaster = new BlockManagerMaster(registerOrLookup(
       "BlockManagerMaster",

diff --git a/core/src/main/scala/org/apache/spark/network/BlockDataManager.scala b/core/src/main/scala/org/apache/spark/network/BlockDataManager.scala
@@ -20,14 +20,14 @@ package org.apache.spark.network
 import org.apache.spark.storage.StorageLevel
 
 
+private[spark]
 trait BlockDataManager {
 
   /**
-   * Interface to get local block data.
-   *
-   * @return Some(buffer) if the block exists locally, and None if it doesn't.
+   * Interface to get local block data. Throws an exception if the block cannot be found or
+   * cannot be read successfully.
    */
-  def getBlockData(blockId: String): Option[ManagedBuffer]
+  def getBlockData(blockId: String): ManagedBuffer
 
   /**
    * Put the block locally, using the given storage level.

diff --git a/core/src/main/scala/org/apache/spark/network/BlockFetchingListener.scala b/core/src/main/scala/org/apache/spark/network/BlockFetchingListener.scala
@@ -23,6 +23,7 @@ import java.util.EventListener
 /**
  * Listener callback interface for [[BlockTransferService.fetchBlocks]].
  */
+private[spark]
 trait BlockFetchingListener extends EventListener {
 
   /**
@@ -31,7 +32,7 @@ trait BlockFetchingListener extends EventListener {
   def onBlockFetchSuccess(blockId: String, data: ManagedBuffer): Unit
 
   /**
-   * Called upon failures. For each failure, this is called only once (i.e. not once per block).
+   * Called at least once per block upon failures.
    */
-  def onBlockFetchFailure(exception: Throwable): Unit
+  def onBlockFetchFailure(blockId: String, exception: Throwable): Unit
 }
diff --git a/core/src/main/scala/org/apache/spark/network/BlockTransferService.scala b/core/src/main/scala/org/apache/spark/network/BlockTransferService.scala
@@ -17,13 +17,17 @@
 
 package org.apache.spark.network
 
+import java.io.Closeable
+import java.nio.ByteBuffer
+
 import scala.concurrent.{Await, Future}
 import scala.concurrent.duration.Duration
 
 import org.apache.spark.storage.StorageLevel
 
 
-abstract class BlockTransferService {
+private[spark]
+abstract class BlockTransferService extends Closeable {
 
   /**
    * Initialize the transfer service by giving it the BlockDataManager that can be used to fetch
@@ -34,7 +38,7 @@ abstract class BlockTransferService {
   /**
    * Tear down the transfer service.
    */
-  def stop(): Unit
+  def close(): Unit
 
   /**
    * Port number the service is listening on, available only after [[init]] is invoked.
@@ -50,9 +54,6 @@ abstract class BlockTransferService {
    * Fetch a sequence of blocks from a remote node asynchronously,
    * available only after [[init]] is invoked.
    *
-   * Note that [[BlockFetchingListener.onBlockFetchSuccess]] is called once per block,
-   * while [[BlockFetchingListener.onBlockFetchFailure]] is called once per failure (not per block).
-   *
    * Note that this API takes a sequence so the implementation can batch requests, and does not
    * return a future so the underlying implementation can invoke onBlockFetchSuccess as soon as
    * the data of a block is fetched, rather than waiting for all blocks to be fetched.
@@ -83,15 +84,18 @@ abstract class BlockTransferService {
     val lock = new Object
     @volatile var result: Either[ManagedBuffer, Throwable] = null
     fetchBlocks(hostName, port, Seq(blockId), new BlockFetchingListener {
-      override def onBlockFetchFailure(exception: Throwable): Unit = {
+      override def onBlockFetchFailure(blockId: String, exception: Throwable): Unit = {
         lock.synchronized {
           result = Right(exception)
           lock.notify()
         }
       }
       override def onBlockFetchSuccess(blockId: String, data: ManagedBuffer): Unit = {
         lock.synchronized {
-          result = Left(data)
+          val ret = ByteBuffer.allocate(data.size.toInt)
+          ret.put(data.nioByteBuffer())
+          ret.flip()
+          result = Left(new NioManagedBuffer(ret))
           lock.notify()
         }
       }

diff --git a/core/src/main/scala/org/apache/spark/network/ManagedBuffer.scala b/core/src/main/scala/org/apache/spark/network/ManagedBuffer.scala
@@ -25,7 +25,8 @@ import java.nio.channels.FileChannel.MapMode
 import scala.util.Try
 
 import com.google.common.io.ByteStreams
-import io.netty.buffer.{ByteBufInputStream, ByteBuf}
+import io.netty.buffer.{Unpooled, ByteBufInputStream, ByteBuf}
+import io.netty.channel.DefaultFileRegion
 
 import org.apache.spark.util.{ByteBufferInputStream, Utils}
 
@@ -34,11 +35,17 @@ import org.apache.spark.util.{ByteBufferInputStream, Utils}
  * This interface provides an immutable view for data in the form of bytes. The implementation
  * should specify how the data is provided:
  *
- * - FileSegmentManagedBuffer: data backed by part of a file
- * - NioByteBufferManagedBuffer: data backed by a NIO ByteBuffer
- * - NettyByteBufManagedBuffer: data backed by a Netty ByteBuf
+ * - [[FileSegmentManagedBuffer]]: data backed by part of a file
+ * - [[NioManagedBuffer]]: data backed by a NIO ByteBuffer
+ * - [[NettyManagedBuffer]]: data backed by a Netty ByteBuf
+ *
+ * The concrete buffer implementation might be managed outside the JVM garbage collector.
+ * For example, in the case of [[NettyManagedBuffer]], the buffers are reference counted.
+ * In that case, if the buffer is going to be passed around to a different thread, retain/release
+ * should be called.
  */
-sealed abstract class ManagedBuffer {
+private[spark]
+abstract class ManagedBuffer {
   // Note that all the methods are defined with parenthesis because their implementations can
   // have side effects (io operations).
 
@@ -57,12 +64,29 @@ sealed abstract class ManagedBuffer {
    * it does not go over the limit.
    */
   def inputStream(): InputStream
+
+  /**
+   * Increment the reference count by one if applicable.
+   */
+  def retain(): this.type
+
+  /**
+   * If applicable, decrement the reference count by one and deallocates the buffer if the
+   * reference count reaches zero.
+   */
+  def release(): this.type
+
+  /**
+   * Convert the buffer into an Netty object, used to write the data out.
+   */
+  private[network] def convertToNetty(): AnyRef
 }
 
 
 /**
  * A [[ManagedBuffer]] backed by a segment in a file
  */
+private[spark]
 final class FileSegmentManagedBuffer(val file: File, val offset: Long, val length: Long)
   extends ManagedBuffer {
 
@@ -113,34 +137,64 @@ final class FileSegmentManagedBuffer(val file: File, val offset: Long, val lengt
     }
   }
 
+  private[network] override def convertToNetty(): AnyRef = {
+    val fileChannel = new FileInputStream(file).getChannel
+    new DefaultFileRegion(fileChannel, offset, length)
+  }
+
+  // Content of file segments are not in-memory, so no need to reference count.
+  override def retain(): this.type = this
+  override def release(): this.type = this
+
   override def toString: String = s"${getClass.getName}($file, $offset, $length)"
 }
 
 
 /**
  * A [[ManagedBuffer]] backed by [[java.nio.ByteBuffer]].
  */
-final class NioByteBufferManagedBuffer(buf: ByteBuffer) extends ManagedBuffer {
+private[spark]
+final class NioManagedBuffer(buf: ByteBuffer) extends ManagedBuffer {
 
   override def size: Long = buf.remaining()
 
   override def nioByteBuffer() = buf.duplicate()
 
   override def inputStream() = new ByteBufferInputStream(buf)
+
+  private[network] override def convertToNetty(): AnyRef = Unpooled.wrappedBuffer(buf)
+
+  // [[ByteBuffer]] is managed by the JVM garbage collector itself.
+  override def retain(): this.type = this
+  override def release(): this.type = this
+
+  override def toString: String = s"${getClass.getName}($buf)"
 }
 
 
 /**
  * A [[ManagedBuffer]] backed by a Netty [[ByteBuf]].
  */
-final class NettyByteBufManagedBuffer(buf: ByteBuf) extends ManagedBuffer {
+private[spark]
+final class NettyManagedBuffer(buf: ByteBuf) extends ManagedBuffer {
 
   override def size: Long = buf.readableBytes()
 
   override def nioByteBuffer() = buf.nioBuffer()
 
   override def inputStream() = new ByteBufInputStream(buf)
 
-  // TODO(rxin): Promote this to top level ManagedBuffer interface and add documentation for it.
-  def release(): Unit = buf.release()
+  private[network] override def convertToNetty(): AnyRef = buf
+
+  override def retain(): this.type = {
+    buf.retain()
+    this
+  }
+
+  override def release(): this.type = {
+    buf.release()
+    this
+  }
+
+  override def toString: String = s"${getClass.getName}($buf)"
 }
diff --git a/...rk/netty/client/BlockClientListener.scala → ...org/apache/spark/network/exceptions.scala b/...rk/netty/client/BlockClientListener.scala → ...org/apache/spark/network/exceptions.scala
@@ -15,15 +15,17 @@
  * limitations under the License.
  */
 
-package org.apache.spark.network.netty.client
+package org.apache.spark.network
 
-import java.util.EventListener
+class BlockFetchFailureException(blockId: String, errorMsg: String, cause: Throwable)
+  extends Exception(errorMsg, cause) {
 
+  def this(blockId: String, errorMsg: String) = this(blockId, errorMsg, null)
+}
 
-trait BlockClientListener extends EventListener {
-
-  def onFetchSuccess(blockId: String, data: ReferenceCountedBuffer): Unit
 
-  def onFetchFailure(blockId: String, errorMsg: String): Unit
+class BlockUploadFailureException(blockId: String, cause: Throwable)
+  extends Exception(s"Failed to fetch block $blockId", cause) {
 
+  def this(blockId: String) = this(blockId, null)
 }