diff --git a/README.md b/README.md
index a1a48f5bd0819..8906e4c1416b1 100644
--- a/README.md
+++ b/README.md
@@ -118,11 +118,7 @@ If your project is built with Maven, add this to your POM file's `<dependencies>
 ## A Note About Thrift JDBC server and CLI for Spark SQL
 
 Spark SQL supports Thrift JDBC server and CLI.
-See sql-programming-guide.md for more information about those features.
-You can use those features by setting `-Phive-thriftserver` when building Spark as follows.
-
-    $ sbt/sbt -Phive-thriftserver assembly
-
+See sql-programming-guide.md for more information about using the JDBC server.
 
 ## Configuration
 
diff --git a/assembly/pom.xml b/assembly/pom.xml
index 703f15925bc44..9fbb037115db3 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -163,11 +163,6 @@
           <artifactId>spark-hive_${scala.binary.version}</artifactId>
           <version>${project.version}</version>
         </dependency>
-      </dependencies>
-    </profile>
-    <profile>
-      <id>hive-thriftserver</id>
-      <dependencies>
         <dependency>
           <groupId>org.apache.spark</groupId>
           <artifactId>spark-hive-thriftserver_${scala.binary.version}</artifactId>
diff --git a/core/src/main/scala/org/apache/spark/SparkConf.scala b/core/src/main/scala/org/apache/spark/SparkConf.scala
index 13f0bff7ee507..605df0e929faa 100644
--- a/core/src/main/scala/org/apache/spark/SparkConf.scala
+++ b/core/src/main/scala/org/apache/spark/SparkConf.scala
@@ -45,7 +45,7 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging {
   /** Create a SparkConf that loads defaults from system properties and the classpath */
   def this() = this(true)
 
-  private val settings = new HashMap[String, String]()
+  private[spark] val settings = new HashMap[String, String]()
 
   if (loadDefaults) {
     // Load any spark.* system properties
@@ -210,6 +210,12 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging {
     new SparkConf(false).setAll(settings)
   }
 
+  /**
+   * By using this instead of System.getenv(), environment variables can be mocked
+   * in unit tests.
+   */
+  private[spark] def getenv(name: String): String = System.getenv(name)
+
   /** Checks for illegal or deprecated config settings. Throws an exception for the former. Not
     * idempotent - may mutate this conf object to convert deprecated settings to supported ones. */
   private[spark] def validateSettings() {
@@ -227,7 +233,7 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging {
     // Validate spark.executor.extraJavaOptions
     settings.get(executorOptsKey).map { javaOpts =>
       if (javaOpts.contains("-Dspark")) {
-        val msg = s"$executorOptsKey is not allowed to set Spark options (was '$javaOpts)'. " +
+        val msg = s"$executorOptsKey is not allowed to set Spark options (was '$javaOpts'). " +
           "Set them directly on a SparkConf or in a properties file when using ./bin/spark-submit."
         throw new Exception(msg)
       }
diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
index 10210a2927dcc..747023812f754 100644
--- a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
@@ -62,7 +62,7 @@ private[spark] class PythonRDD(
     val env = SparkEnv.get
     val localdir = env.blockManager.diskBlockManager.localDirs.map(
       f => f.getPath()).mkString(",")
-    envVars += ("SPARK_LOCAL_DIR" -> localdir) // it's also used in monitor thread
+    envVars += ("SPARK_LOCAL_DIRS" -> localdir) // it's also used in monitor thread
     val worker: Socket = env.createPythonWorker(pythonExec, envVars.toMap)
 
     // Start a thread to feed the process input from our parent's iterator
diff --git a/core/src/main/scala/org/apache/spark/broadcast/BroadcastFactory.scala b/core/src/main/scala/org/apache/spark/broadcast/BroadcastFactory.scala
index a8c827030a1ef..6a187b40628a2 100644
--- a/core/src/main/scala/org/apache/spark/broadcast/BroadcastFactory.scala
+++ b/core/src/main/scala/org/apache/spark/broadcast/BroadcastFactory.scala
@@ -32,8 +32,19 @@ import org.apache.spark.annotation.DeveloperApi
  */
 @DeveloperApi
 trait BroadcastFactory {
+
   def initialize(isDriver: Boolean, conf: SparkConf, securityMgr: SecurityManager): Unit
+
+  /**
+   * Creates a new broadcast variable.
+   *
+   * @param value value to broadcast
+   * @param isLocal whether we are in local mode (single JVM process)
+   * @param id unique id representing this broadcast variable
+   */
   def newBroadcast[T: ClassTag](value: T, isLocal: Boolean, id: Long): Broadcast[T]
+
   def unbroadcast(id: Long, removeFromDriver: Boolean, blocking: Boolean): Unit
+
   def stop(): Unit
 }
diff --git a/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala b/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala
index fe73456ef8fad..6173fd3a69fc7 100644
--- a/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala
+++ b/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala
@@ -17,54 +17,117 @@
 
 package org.apache.spark.broadcast
 
-import java.io.{ByteArrayOutputStream, ByteArrayInputStream, InputStream,
-  ObjectInputStream, ObjectOutputStream, OutputStream}
+import java.io._
+import java.nio.ByteBuffer
 
+import scala.collection.JavaConversions.asJavaEnumeration
 import scala.reflect.ClassTag
 import scala.util.Random
 
 import org.apache.spark.{Logging, SparkConf, SparkEnv, SparkException}
 import org.apache.spark.io.CompressionCodec
 import org.apache.spark.storage.{BroadcastBlockId, StorageLevel}
+import org.apache.spark.util.ByteBufferInputStream
 
 /**
- *  A [[org.apache.spark.broadcast.Broadcast]] implementation that uses a BitTorrent-like
- *  protocol to do a distributed transfer of the broadcasted data to the executors.
- *  The mechanism is as follows. The driver divides the serializes the broadcasted data,
- *  divides it into smaller chunks, and stores them in the BlockManager of the driver.
- *  These chunks are reported to the BlockManagerMaster so that all the executors can
- *  learn the location of those chunks. The first time the broadcast variable (sent as
- *  part of task) is deserialized at a executor, all the chunks are fetched using
- *  the BlockManager. When all the chunks are fetched (initially from the driver's
- *  BlockManager), they are combined and deserialized to recreate the broadcasted data.
- *  However, the chunks are also stored in the BlockManager and reported to the
- *  BlockManagerMaster. As more executors fetch the chunks, BlockManagerMaster learns
- *  multiple locations for each chunk. Hence, subsequent fetches of each chunk will be
- *  made to other executors who already have those chunks, resulting in a distributed
- *  fetching. This prevents the driver from being the bottleneck in sending out multiple
- *  copies of the broadcast data (one per executor) as done by the
- *  [[org.apache.spark.broadcast.HttpBroadcast]].
+ * A BitTorrent-like implementation of [[org.apache.spark.broadcast.Broadcast]].
+ *
+ * The mechanism is as follows:
+ *
+ * The driver divides the serialized object into small chunks and
+ * stores those chunks in the BlockManager of the driver.
+ *
+ * On each executor, the executor first attempts to fetch the object from its BlockManager. If
+ * it does not exist, it then uses remote fetches to fetch the small chunks from the driver and/or
+ * other executors if available. Once it gets the chunks, it puts the chunks in its own
+ * BlockManager, ready for other executors to fetch from.
+ *
+ * This prevents the driver from being the bottleneck in sending out multiple copies of the
+ * broadcast data (one per executor) as done by the [[org.apache.spark.broadcast.HttpBroadcast]].
+ *
+ * @param obj object to broadcast
+ * @param isLocal whether Spark is running in local mode (single JVM process).
+ * @param id A unique identifier for the broadcast variable.
  */
 private[spark] class TorrentBroadcast[T: ClassTag](
-    @transient var value_ : T, isLocal: Boolean, id: Long)
+    obj : T,
+    @transient private val isLocal: Boolean,
+    id: Long)
   extends Broadcast[T](id) with Logging with Serializable {
 
-  override protected def getValue() = value_
+  /**
+   * Value of the broadcast object. On driver, this is set directly by the constructor.
+   * On executors, this is reconstructed by [[readObject]], which builds this value by reading
+   * blocks from the driver and/or other executors.
+   */
+  @transient private var _value: T = obj
 
   private val broadcastId = BroadcastBlockId(id)
 
-  TorrentBroadcast.synchronized {
+  /** Total number of blocks this broadcast variable contains. */
+  private val numBlocks: Int = writeBlocks()
+
+  override protected def getValue() = _value
+
+  /**
+   * Divide the object into multiple blocks and put those blocks in the block manager.
+   *
+   * @return number of blocks this broadcast variable is divided into
+   */
+  private def writeBlocks(): Int = {
+    // For local mode, just put the object in the BlockManager so we can find it later.
     SparkEnv.get.blockManager.putSingle(
-      broadcastId, value_, StorageLevel.MEMORY_AND_DISK, tellMaster = false)
+      broadcastId, _value, StorageLevel.MEMORY_AND_DISK, tellMaster = false)
+
+    if (!isLocal) {
+      val blocks = TorrentBroadcast.blockifyObject(_value)
+      blocks.zipWithIndex.foreach { case (block, i) =>
+        SparkEnv.get.blockManager.putBytes(
+          BroadcastBlockId(id, "piece" + i),
+          block,
+          StorageLevel.MEMORY_AND_DISK_SER,
+          tellMaster = true)
+      }
+      blocks.length
+    } else {
+      0
+    }
   }
 
-  @transient private var arrayOfBlocks: Array[TorrentBlock] = null
-  @transient private var totalBlocks = -1
-  @transient private var totalBytes = -1
-  @transient private var hasBlocks = 0
+  /** Fetch torrent blocks from the driver and/or other executors. */
+  private def readBlocks(): Array[ByteBuffer] = {
+    // Fetch chunks of data. Note that all these chunks are stored in the BlockManager and reported
+    // to the driver, so other executors can pull these chunks from this executor as well.
+    val blocks = new Array[ByteBuffer](numBlocks)
+    val bm = SparkEnv.get.blockManager
 
-  if (!isLocal) {
-    sendBroadcast()
+    for (pid <- Random.shuffle(Seq.range(0, numBlocks))) {
+      val pieceId = BroadcastBlockId(id, "piece" + pid)
+
+      // First try getLocalBytes because  there is a chance that previous attempts to fetch the
+      // broadcast blocks have already fetched some of the blocks. In that case, some blocks
+      // would be available locally (on this executor).
+      var blockOpt = bm.getLocalBytes(pieceId)
+      if (!blockOpt.isDefined) {
+        blockOpt = bm.getRemoteBytes(pieceId)
+        blockOpt match {
+          case Some(block) =>
+            // If we found the block from remote executors/driver's BlockManager, put the block
+            // in this executor's BlockManager.
+            SparkEnv.get.blockManager.putBytes(
+              pieceId,
+              block,
+              StorageLevel.MEMORY_AND_DISK_SER,
+              tellMaster = true)
+
+          case None =>
+            throw new SparkException("Failed to get " + pieceId + " of " + broadcastId)
+        }
+      }
+      // If we get here, the option is defined.
+      blocks(pid) = blockOpt.get
+    }
+    blocks
   }
 
   /**
@@ -82,30 +145,6 @@ private[spark] class TorrentBroadcast[T: ClassTag](
     TorrentBroadcast.unpersist(id, removeFromDriver = true, blocking)
   }
 
-  private def sendBroadcast() {
-    val tInfo = TorrentBroadcast.blockifyObject(value_)
-    totalBlocks = tInfo.totalBlocks
-    totalBytes = tInfo.totalBytes
-    hasBlocks = tInfo.totalBlocks
-
-    // Store meta-info
-    val metaId = BroadcastBlockId(id, "meta")
-    val metaInfo = TorrentInfo(null, totalBlocks, totalBytes)
-    TorrentBroadcast.synchronized {
-      SparkEnv.get.blockManager.putSingle(
-        metaId, metaInfo, StorageLevel.MEMORY_AND_DISK, tellMaster = true)
-    }
-
-    // Store individual pieces
-    for (i <- 0 until totalBlocks) {
-      val pieceId = BroadcastBlockId(id, "piece" + i)
-      TorrentBroadcast.synchronized {
-        SparkEnv.get.blockManager.putSingle(
-          pieceId, tInfo.arrayOfBlocks(i), StorageLevel.MEMORY_AND_DISK, tellMaster = true)
-      }
-    }
-  }
-
   /** Used by the JVM when serializing this object. */
   private def writeObject(out: ObjectOutputStream) {
     assertValid()
@@ -116,102 +155,30 @@ private[spark] class TorrentBroadcast[T: ClassTag](
   private def readObject(in: ObjectInputStream) {
     in.defaultReadObject()
     TorrentBroadcast.synchronized {
-      SparkEnv.get.blockManager.getSingle(broadcastId) match {
+      SparkEnv.get.blockManager.getLocal(broadcastId).map(_.data.next()) match {
         case Some(x) =>
-          value_ = x.asInstanceOf[T]
+          _value = x.asInstanceOf[T]
 
         case None =>
-          val start = System.nanoTime
           logInfo("Started reading broadcast variable " + id)
-
-          // Initialize @transient variables that will receive garbage values from the master.
-          resetWorkerVariables()
-
-          if (receiveBroadcast()) {
-            value_ = TorrentBroadcast.unBlockifyObject[T](arrayOfBlocks, totalBytes, totalBlocks)
-
-            /* Store the merged copy in cache so that the next worker doesn't need to rebuild it.
-             * This creates a trade-off between memory usage and latency. Storing copy doubles
-             * the memory footprint; not storing doubles deserialization cost. Also,
-             * this does not need to be reported to BlockManagerMaster since other executors
-             * does not need to access this block (they only need to fetch the chunks,
-             * which are reported).
-             */
-            SparkEnv.get.blockManager.putSingle(
-              broadcastId, value_, StorageLevel.MEMORY_AND_DISK, tellMaster = false)
-
-            // Remove arrayOfBlocks from memory once value_ is on local cache
-            resetWorkerVariables()
-          } else {
-            logError("Reading broadcast variable " + id + " failed")
-          }
-
-          val time = (System.nanoTime - start) / 1e9
+          val start = System.nanoTime()
+          val blocks = readBlocks()
+          val time = (System.nanoTime() - start) / 1e9
           logInfo("Reading broadcast variable " + id + " took " + time + " s")
-      }
-    }
-  }
 
-  private def resetWorkerVariables() {
-    arrayOfBlocks = null
-    totalBytes = -1
-    totalBlocks = -1
-    hasBlocks = 0
-  }
-
-  private def receiveBroadcast(): Boolean = {
-    // Receive meta-info about the size of broadcast data,
-    // the number of chunks it is divided into, etc.
-    val metaId = BroadcastBlockId(id, "meta")
-    var attemptId = 10
-    while (attemptId > 0 && totalBlocks == -1) {
-      TorrentBroadcast.synchronized {
-        SparkEnv.get.blockManager.getSingle(metaId) match {
-          case Some(x) =>
-            val tInfo = x.asInstanceOf[TorrentInfo]
-            totalBlocks = tInfo.totalBlocks
-            totalBytes = tInfo.totalBytes
-            arrayOfBlocks = new Array[TorrentBlock](totalBlocks)
-            hasBlocks = 0
-
-          case None =>
-            Thread.sleep(500)
-        }
+          _value = TorrentBroadcast.unBlockifyObject[T](blocks)
+          // Store the merged copy in BlockManager so other tasks on this executor don't
+          // need to re-fetch it.
+          SparkEnv.get.blockManager.putSingle(
+            broadcastId, _value, StorageLevel.MEMORY_AND_DISK, tellMaster = false)
       }
-      attemptId -= 1
     }
-    if (totalBlocks == -1) {
-      return false
-    }
-
-    /*
-     * Fetch actual chunks of data. Note that all these chunks are stored in
-     * the BlockManager and reported to the master, so that other executors
-     * can find out and pull the chunks from this executor.
-     */
-    val recvOrder = new Random().shuffle(Array.iterate(0, totalBlocks)(_ + 1).toList)
-    for (pid <- recvOrder) {
-      val pieceId = BroadcastBlockId(id, "piece" + pid)
-      TorrentBroadcast.synchronized {
-        SparkEnv.get.blockManager.getSingle(pieceId) match {
-          case Some(x) =>
-            arrayOfBlocks(pid) = x.asInstanceOf[TorrentBlock]
-            hasBlocks += 1
-            SparkEnv.get.blockManager.putSingle(
-              pieceId, arrayOfBlocks(pid), StorageLevel.MEMORY_AND_DISK, tellMaster = true)
-
-          case None =>
-            throw new SparkException("Failed to get " + pieceId + " of " + broadcastId)
-        }
-      }
-    }
-
-    hasBlocks == totalBlocks
   }
-
 }
 
-private[broadcast] object TorrentBroadcast extends Logging {
+
+private object TorrentBroadcast extends Logging {
+  /** Size of each block. Default value is 4MB. */
   private lazy val BLOCK_SIZE = conf.getInt("spark.broadcast.blockSize", 4096) * 1024
   private var initialized = false
   private var conf: SparkConf = null
@@ -233,7 +200,9 @@ private[broadcast] object TorrentBroadcast extends Logging {
     initialized = false
   }
 
-  def blockifyObject[T: ClassTag](obj: T): TorrentInfo = {
+  def blockifyObject[T: ClassTag](obj: T): Array[ByteBuffer] = {
+    // TODO: Create a special ByteArrayOutputStream that splits the output directly into chunks
+    // so we don't need to do the extra memory copy.
     val bos = new ByteArrayOutputStream()
     val out: OutputStream = if (compress) compressionCodec.compressedOutputStream(bos) else bos
     val ser = SparkEnv.get.serializer.newInstance()
@@ -241,44 +210,27 @@ private[broadcast] object TorrentBroadcast extends Logging {
     serOut.writeObject[T](obj).close()
     val byteArray = bos.toByteArray
     val bais = new ByteArrayInputStream(byteArray)
+    val numBlocks = math.ceil(byteArray.length.toDouble / BLOCK_SIZE).toInt
+    val blocks = new Array[ByteBuffer](numBlocks)
 
-    var blockNum = byteArray.length / BLOCK_SIZE
-    if (byteArray.length % BLOCK_SIZE != 0) {
-      blockNum += 1
-    }
-
-    val blocks = new Array[TorrentBlock](blockNum)
     var blockId = 0
-
     for (i <- 0 until (byteArray.length, BLOCK_SIZE)) {
       val thisBlockSize = math.min(BLOCK_SIZE, byteArray.length - i)
       val tempByteArray = new Array[Byte](thisBlockSize)
       bais.read(tempByteArray, 0, thisBlockSize)
 
-      blocks(blockId) = new TorrentBlock(blockId, tempByteArray)
+      blocks(blockId) = ByteBuffer.wrap(tempByteArray)
       blockId += 1
     }
     bais.close()
-
-    val info = TorrentInfo(blocks, blockNum, byteArray.length)
-    info.hasBlocks = blockNum
-    info
+    blocks
   }
 
-  def unBlockifyObject[T: ClassTag](
-      arrayOfBlocks: Array[TorrentBlock],
-      totalBytes: Int,
-      totalBlocks: Int): T = {
-    val retByteArray = new Array[Byte](totalBytes)
-    for (i <- 0 until totalBlocks) {
-      System.arraycopy(arrayOfBlocks(i).byteArray, 0, retByteArray,
-        i * BLOCK_SIZE, arrayOfBlocks(i).byteArray.length)
-    }
+  def unBlockifyObject[T: ClassTag](blocks: Array[ByteBuffer]): T = {
+    val is = new SequenceInputStream(
+      asJavaEnumeration(blocks.iterator.map(block => new ByteBufferInputStream(block))))
+    val in: InputStream = if (compress) compressionCodec.compressedInputStream(is) else is
 
-    val in: InputStream = {
-      val arrIn = new ByteArrayInputStream(retByteArray)
-      if (compress) compressionCodec.compressedInputStream(arrIn) else arrIn
-    }
     val ser = SparkEnv.get.serializer.newInstance()
     val serIn = ser.deserializeStream(in)
     val obj = serIn.readObject[T]()
@@ -291,22 +243,6 @@ private[broadcast] object TorrentBroadcast extends Logging {
    * If removeFromDriver is true, also remove these persisted blocks on the driver.
    */
   def unpersist(id: Long, removeFromDriver: Boolean, blocking: Boolean) = {
-    synchronized {
-      SparkEnv.get.blockManager.master.removeBroadcast(id, removeFromDriver, blocking)
-    }
+    SparkEnv.get.blockManager.master.removeBroadcast(id, removeFromDriver, blocking)
   }
 }
-
-private[broadcast] case class TorrentBlock(
-    blockID: Int,
-    byteArray: Array[Byte])
-  extends Serializable
-
-private[broadcast] case class TorrentInfo(
-    @transient arrayOfBlocks: Array[TorrentBlock],
-    totalBlocks: Int,
-    totalBytes: Int)
-  extends Serializable {
-
-  @transient var hasBlocks = 0
-}
diff --git a/core/src/main/scala/org/apache/spark/executor/Executor.scala b/core/src/main/scala/org/apache/spark/executor/Executor.scala
index fb3f7bd54bbfa..2f76e532aeb76 100644
--- a/core/src/main/scala/org/apache/spark/executor/Executor.scala
+++ b/core/src/main/scala/org/apache/spark/executor/Executor.scala
@@ -62,16 +62,6 @@ private[spark] class Executor(
   val conf = new SparkConf(true)
   conf.setAll(properties)
 
-  // If we are in yarn mode, systems can have different disk layouts so we must set it
-  // to what Yarn on this system said was available. This will be used later when SparkEnv
-  // created.
-  if (java.lang.Boolean.valueOf(
-      System.getProperty("SPARK_YARN_MODE", System.getenv("SPARK_YARN_MODE")))) {
-    conf.set("spark.local.dir", getYarnLocalDirs())
-  } else if (sys.env.contains("SPARK_LOCAL_DIRS")) {
-    conf.set("spark.local.dir", sys.env("SPARK_LOCAL_DIRS"))
-  }
-
   if (!isLocal) {
     // Setup an uncaught exception handler for non-local mode.
     // Make any thread terminations due to uncaught exceptions kill the entire
@@ -134,21 +124,6 @@ private[spark] class Executor(
     threadPool.shutdown()
   }
 
-  /** Get the Yarn approved local directories. */
-  private def getYarnLocalDirs(): String = {
-    // Hadoop 0.23 and 2.x have different Environment variable names for the
-    // local dirs, so lets check both. We assume one of the 2 is set.
-    // LOCAL_DIRS => 2.X, YARN_LOCAL_DIRS => 0.23.X
-    val localDirs = Option(System.getenv("YARN_LOCAL_DIRS"))
-      .getOrElse(Option(System.getenv("LOCAL_DIRS"))
-      .getOrElse(""))
-
-    if (localDirs.isEmpty) {
-      throw new Exception("Yarn Local dirs can't be empty")
-    }
-    localDirs
-  }
-
   class TaskRunner(
       execBackend: ExecutorBackend, val taskId: Long, taskName: String, serializedTask: ByteBuffer)
     extends Runnable {
diff --git a/core/src/main/scala/org/apache/spark/network/ConnectionManager.scala b/core/src/main/scala/org/apache/spark/network/ConnectionManager.scala
index e77d762bdf221..b3e951ded6e77 100644
--- a/core/src/main/scala/org/apache/spark/network/ConnectionManager.scala
+++ b/core/src/main/scala/org/apache/spark/network/ConnectionManager.scala
@@ -467,7 +467,7 @@ private[spark] class ConnectionManager(
 
           val sendingConnectionOpt = connectionsById.get(remoteConnectionManagerId)
           if (!sendingConnectionOpt.isDefined) {
-            logError("Corresponding SendingConnectionManagerId not found")
+            logError(s"Corresponding SendingConnection to ${remoteConnectionManagerId} not found")
             return
           }
 
diff --git a/core/src/main/scala/org/apache/spark/network/netty/FileClient.scala b/core/src/main/scala/org/apache/spark/network/netty/FileClient.scala
deleted file mode 100644
index c6d35f73db545..0000000000000
--- a/core/src/main/scala/org/apache/spark/network/netty/FileClient.scala
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.network.netty
-
-import java.util.concurrent.TimeUnit
-
-import io.netty.bootstrap.Bootstrap
-import io.netty.channel.{Channel, ChannelOption, EventLoopGroup}
-import io.netty.channel.oio.OioEventLoopGroup
-import io.netty.channel.socket.oio.OioSocketChannel
-
-import org.apache.spark.Logging
-
-class FileClient(handler: FileClientHandler, connectTimeout: Int) extends Logging {
-
-  private var channel: Channel = _
-  private var bootstrap: Bootstrap = _
-  private var group: EventLoopGroup = _
-  private val sendTimeout = 60
-
-  def init(): Unit = {
-    group = new OioEventLoopGroup
-    bootstrap = new Bootstrap
-    bootstrap.group(group)
-      .channel(classOf[OioSocketChannel])
-      .option(ChannelOption.SO_KEEPALIVE, java.lang.Boolean.TRUE)
-      .option(ChannelOption.TCP_NODELAY, java.lang.Boolean.TRUE)
-      .option(ChannelOption.CONNECT_TIMEOUT_MILLIS, Integer.valueOf(connectTimeout))
-      .handler(new FileClientChannelInitializer(handler))
-  }
-
-  def connect(host: String, port: Int) {
-    try {
-      channel = bootstrap.connect(host, port).sync().channel()
-    } catch {
-      case e: InterruptedException =>
-        logWarning("FileClient interrupted while trying to connect", e)
-        close()
-    }
-  }
-
-  def waitForClose(): Unit = {
-    try {
-      channel.closeFuture.sync()
-    } catch {
-      case e: InterruptedException =>
-        logWarning("FileClient interrupted", e)
-    }
-  }
-
-  def sendRequest(file: String): Unit = {
-    try {
-      val bSent = channel.writeAndFlush(file + "\r\n").await(sendTimeout, TimeUnit.SECONDS)
-      if (!bSent) {
-        throw new RuntimeException("Failed to send")
-      }
-    } catch {
-      case e: InterruptedException =>
-        logError("Error", e)
-    }
-  }
-
-  def close(): Unit = {
-    if (group != null) {
-      group.shutdownGracefully()
-      group = null
-      bootstrap = null
-    }
-  }
-}
diff --git a/core/src/main/scala/org/apache/spark/network/netty/FileClientHandler.scala b/core/src/main/scala/org/apache/spark/network/netty/FileClientHandler.scala
deleted file mode 100644
index 017302ec7d33d..0000000000000
--- a/core/src/main/scala/org/apache/spark/network/netty/FileClientHandler.scala
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.network.netty
-
-import io.netty.buffer.ByteBuf
-import io.netty.channel.{ChannelHandlerContext, SimpleChannelInboundHandler}
-
-import org.apache.spark.storage.BlockId
-
-
-abstract class FileClientHandler extends SimpleChannelInboundHandler[ByteBuf] {
-
-  private var currentHeader: FileHeader = null
-
-  @volatile
-  private var handlerCalled: Boolean = false
-
-  def isComplete: Boolean = handlerCalled
-
-  def handle(ctx: ChannelHandlerContext, in: ByteBuf, header: FileHeader)
-
-  def handleError(blockId: BlockId)
-
-  override def channelRead0(ctx: ChannelHandlerContext, in: ByteBuf) {
-    if (currentHeader == null && in.readableBytes >= FileHeader.HEADER_SIZE) {
-      currentHeader = FileHeader.create(in.readBytes(FileHeader.HEADER_SIZE))
-    }
-    if (in.readableBytes >= currentHeader.fileLen) {
-      handle(ctx, in, currentHeader)
-      handlerCalled = true
-      currentHeader = null
-      ctx.close()
-    }
-  }
-}
diff --git a/core/src/main/scala/org/apache/spark/network/netty/FileHeader.scala b/core/src/main/scala/org/apache/spark/network/netty/FileHeader.scala
deleted file mode 100644
index 607e560ff277f..0000000000000
--- a/core/src/main/scala/org/apache/spark/network/netty/FileHeader.scala
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.network.netty
-
-import io.netty.buffer._
-
-import org.apache.spark.Logging
-import org.apache.spark.storage.{BlockId, TestBlockId}
-
-private[spark] class FileHeader (
-  val fileLen: Int,
-  val blockId: BlockId) extends Logging {
-
-  lazy val buffer: ByteBuf = {
-    val buf = Unpooled.buffer()
-    buf.capacity(FileHeader.HEADER_SIZE)
-    buf.writeInt(fileLen)
-    buf.writeInt(blockId.name.length)
-    blockId.name.foreach((x: Char) => buf.writeByte(x))
-    // padding the rest of header
-    if (FileHeader.HEADER_SIZE - buf.readableBytes > 0 ) {
-      buf.writeZero(FileHeader.HEADER_SIZE - buf.readableBytes)
-    } else {
-      throw new Exception("too long header " + buf.readableBytes)
-      logInfo("too long header")
-    }
-    buf
-  }
-
-}
-
-private[spark] object FileHeader {
-
-  val HEADER_SIZE = 40
-
-  def getFileLenOffset = 0
-  def getFileLenSize = Integer.SIZE/8
-
-  def create(buf: ByteBuf): FileHeader = {
-    val length = buf.readInt
-    val idLength = buf.readInt
-    val idBuilder = new StringBuilder(idLength)
-    for (i <- 1 to idLength) {
-      idBuilder += buf.readByte().asInstanceOf[Char]
-    }
-    val blockId = BlockId(idBuilder.toString())
-    new FileHeader(length, blockId)
-  }
-
-  def main(args:Array[String]) {
-    val header = new FileHeader(25, TestBlockId("my_block"))
-    val buf = header.buffer
-    val newHeader = FileHeader.create(buf)
-    System.out.println("id=" + newHeader.blockId + ",size=" + newHeader.fileLen)
-  }
-}
diff --git a/core/src/main/scala/org/apache/spark/network/netty/FileServer.scala b/core/src/main/scala/org/apache/spark/network/netty/FileServer.scala
deleted file mode 100644
index dff77950659af..0000000000000
--- a/core/src/main/scala/org/apache/spark/network/netty/FileServer.scala
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.network.netty
-
-import java.net.InetSocketAddress
-
-import io.netty.bootstrap.ServerBootstrap
-import io.netty.channel.{ChannelFuture, ChannelOption, EventLoopGroup}
-import io.netty.channel.oio.OioEventLoopGroup
-import io.netty.channel.socket.oio.OioServerSocketChannel
-
-import org.apache.spark.Logging
-
-/**
- * Server that accept the path of a file an echo back its content.
- */
-class FileServer(pResolver: PathResolver, private var port: Int) extends Logging {
-
-  private val addr: InetSocketAddress = new InetSocketAddress(port)
-  private var bossGroup: EventLoopGroup = new OioEventLoopGroup
-  private var workerGroup: EventLoopGroup = new OioEventLoopGroup
-
-  private var channelFuture: ChannelFuture = {
-    val bootstrap = new ServerBootstrap
-    bootstrap.group(bossGroup, workerGroup)
-      .channel(classOf[OioServerSocketChannel])
-      .option(ChannelOption.SO_BACKLOG, java.lang.Integer.valueOf(100))
-      .option(ChannelOption.SO_RCVBUF, java.lang.Integer.valueOf(1500))
-      .childHandler(new FileServerChannelInitializer(pResolver))
-    bootstrap.bind(addr)
-  }
-
-  try {
-    val boundAddress = channelFuture.sync.channel.localAddress.asInstanceOf[InetSocketAddress]
-    port = boundAddress.getPort
-  } catch {
-    case ie: InterruptedException =>
-      port = 0
-  }
-
-  /** Start the file server asynchronously in a new thread. */
-  def start(): Unit = {
-    val blockingThread: Thread = new Thread {
-      override def run(): Unit = {
-        try {
-          channelFuture.channel.closeFuture.sync
-          logInfo("FileServer exiting")
-        } catch {
-          case e: InterruptedException =>
-            logError("File server start got interrupted", e)
-        }
-        // NOTE: bootstrap is shutdown in stop()
-      }
-    }
-    blockingThread.setDaemon(true)
-    blockingThread.start()
-  }
-
-  def getPort: Int = port
-
-  def stop(): Unit = {
-    if (channelFuture != null) {
-      channelFuture.channel().close().awaitUninterruptibly()
-      channelFuture = null
-    }
-    if (bossGroup != null) {
-      bossGroup.shutdownGracefully()
-      bossGroup = null
-    }
-    if (workerGroup != null) {
-      workerGroup.shutdownGracefully()
-      workerGroup = null
-    }
-  }
-}
-
diff --git a/core/src/main/scala/org/apache/spark/network/netty/FileServerHandler.scala b/core/src/main/scala/org/apache/spark/network/netty/FileServerHandler.scala
deleted file mode 100644
index 96f60b2883ad9..0000000000000
--- a/core/src/main/scala/org/apache/spark/network/netty/FileServerHandler.scala
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.network.netty
-
-import java.io.FileInputStream
-
-import io.netty.channel.{DefaultFileRegion, ChannelHandlerContext, SimpleChannelInboundHandler}
-
-import org.apache.spark.Logging
-import org.apache.spark.storage.{BlockId, FileSegment}
-
-
-class FileServerHandler(pResolver: PathResolver)
-  extends SimpleChannelInboundHandler[String] with Logging {
-
-  override def channelRead0(ctx: ChannelHandlerContext, blockIdString: String): Unit = {
-    val blockId: BlockId = BlockId(blockIdString)
-    val fileSegment: FileSegment = pResolver.getBlockLocation(blockId)
-    if (fileSegment == null) {
-      return
-    }
-    val file = fileSegment.file
-    if (file.exists) {
-      if (!file.isFile) {
-        ctx.write(new FileHeader(0, blockId).buffer)
-        ctx.flush()
-        return
-      }
-      val length: Long = fileSegment.length
-      if (length > Integer.MAX_VALUE || length <= 0) {
-        ctx.write(new FileHeader(0, blockId).buffer)
-        ctx.flush()
-        return
-      }
-      ctx.write(new FileHeader(length.toInt, blockId).buffer)
-      try {
-        val channel = new FileInputStream(file).getChannel
-        ctx.write(new DefaultFileRegion(channel, fileSegment.offset, fileSegment.length))
-      } catch {
-        case e: Exception =>
-          logError("Exception: ", e)
-      }
-    } else {
-      ctx.write(new FileHeader(0, blockId).buffer)
-    }
-    ctx.flush()
-  }
-
-  override def exceptionCaught(ctx: ChannelHandlerContext, cause: Throwable): Unit = {
-    logError("Exception: ", cause)
-    ctx.close()
-  }
-}
diff --git a/core/src/main/scala/org/apache/spark/network/netty/NettyConfig.scala b/core/src/main/scala/org/apache/spark/network/netty/NettyConfig.scala
new file mode 100644
index 0000000000000..b5870152c5a64
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/network/netty/NettyConfig.scala
@@ -0,0 +1,59 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.netty
+
+import org.apache.spark.SparkConf
+
+/**
+ * A central location that tracks all the settings we exposed to users.
+ */
+private[spark]
+class NettyConfig(conf: SparkConf) {
+
+  /** Port the server listens on. Default to a random port. */
+  private[netty] val serverPort = conf.getInt("spark.shuffle.io.port", 0)
+
+  /** IO mode: nio, oio, epoll, or auto (try epoll first and then nio). */
+  private[netty] val ioMode = conf.get("spark.shuffle.io.mode", "nio").toLowerCase
+
+  /** Connect timeout in secs. Default 60 secs. */
+  private[netty] val connectTimeoutMs = conf.getInt("spark.shuffle.io.connectionTimeout", 60) * 1000
+
+  /**
+   * Percentage of the desired amount of time spent for I/O in the child event loops.
+   * Only applicable in nio and epoll.
+   */
+  private[netty] val ioRatio = conf.getInt("spark.shuffle.io.netty.ioRatio", 80)
+
+  /** Requested maximum length of the queue of incoming connections. */
+  private[netty] val backLog: Option[Int] = conf.getOption("spark.shuffle.io.backLog").map(_.toInt)
+
+  /**
+   * Receive buffer size (SO_RCVBUF).
+   * Note: the optimal size for receive buffer and send buffer should be
+   *  latency * network_bandwidth.
+   * Assuming latency = 1ms, network_bandwidth = 10Gbps
+   *  buffer size should be ~ 1.25MB
+   */
+  private[netty] val receiveBuf: Option[Int] =
+    conf.getOption("spark.shuffle.io.sendBuffer").map(_.toInt)
+
+  /** Send buffer size (SO_SNDBUF). */
+  private[netty] val sendBuf: Option[Int] =
+    conf.getOption("spark.shuffle.io.sendBuffer").map(_.toInt)
+}
diff --git a/core/src/main/scala/org/apache/spark/network/netty/ShuffleCopier.scala b/core/src/main/scala/org/apache/spark/network/netty/ShuffleCopier.scala
deleted file mode 100644
index e7b2855e1ec91..0000000000000
--- a/core/src/main/scala/org/apache/spark/network/netty/ShuffleCopier.scala
+++ /dev/null
@@ -1,118 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.network.netty
-
-import java.util.concurrent.Executors
-
-import scala.collection.JavaConverters._
-
-import io.netty.buffer.ByteBuf
-import io.netty.channel.ChannelHandlerContext
-import io.netty.util.CharsetUtil
-
-import org.apache.spark.{Logging, SparkConf}
-import org.apache.spark.network.ConnectionManagerId
-import org.apache.spark.storage.BlockId
-
-private[spark] class ShuffleCopier(conf: SparkConf) extends Logging {
-
-  def getBlock(host: String, port: Int, blockId: BlockId,
-      resultCollectCallback: (BlockId, Long, ByteBuf) => Unit) {
-
-    val handler = new ShuffleCopier.ShuffleClientHandler(resultCollectCallback)
-    val connectTimeout = conf.getInt("spark.shuffle.netty.connect.timeout", 60000)
-    val fc = new FileClient(handler, connectTimeout)
-
-    try {
-      fc.init()
-      fc.connect(host, port)
-      fc.sendRequest(blockId.name)
-      fc.waitForClose()
-      fc.close()
-    } catch {
-      // Handle any socket-related exceptions in FileClient
-      case e: Exception => {
-        logError("Shuffle copy of block " + blockId + " from " + host + ":" + port + " failed", e)
-        handler.handleError(blockId)
-      }
-    }
-  }
-
-  def getBlock(cmId: ConnectionManagerId, blockId: BlockId,
-      resultCollectCallback: (BlockId, Long, ByteBuf) => Unit) {
-    getBlock(cmId.host, cmId.port, blockId, resultCollectCallback)
-  }
-
-  def getBlocks(cmId: ConnectionManagerId,
-    blocks: Seq[(BlockId, Long)],
-    resultCollectCallback: (BlockId, Long, ByteBuf) => Unit) {
-
-    for ((blockId, size) <- blocks) {
-      getBlock(cmId, blockId, resultCollectCallback)
-    }
-  }
-}
-
-
-private[spark] object ShuffleCopier extends Logging {
-
-  private class ShuffleClientHandler(resultCollectCallBack: (BlockId, Long, ByteBuf) => Unit)
-    extends FileClientHandler with Logging {
-
-    override def handle(ctx: ChannelHandlerContext, in: ByteBuf, header: FileHeader) {
-      logDebug("Received Block: " + header.blockId + " (" + header.fileLen + "B)")
-      resultCollectCallBack(header.blockId, header.fileLen.toLong, in.readBytes(header.fileLen))
-    }
-
-    override def handleError(blockId: BlockId) {
-      if (!isComplete) {
-        resultCollectCallBack(blockId, -1, null)
-      }
-    }
-  }
-
-  def echoResultCollectCallBack(blockId: BlockId, size: Long, content: ByteBuf) {
-    if (size != -1) {
-      logInfo("File: " + blockId + " content is : \" " + content.toString(CharsetUtil.UTF_8) + "\"")
-    }
-  }
-
-  def main(args: Array[String]) {
-    if (args.length < 3) {
-      System.err.println("Usage: ShuffleCopier <host> <port> <shuffle_block_id> <threads>")
-      System.exit(1)
-    }
-    val host = args(0)
-    val port = args(1).toInt
-    val blockId = BlockId(args(2))
-    val threads = if (args.length > 3) args(3).toInt else 10
-
-    val copiers = Executors.newFixedThreadPool(80)
-    val tasks = (for (i <- Range(0, threads)) yield {
-      Executors.callable(new Runnable() {
-        def run() {
-          val copier = new ShuffleCopier(new SparkConf)
-          copier.getBlock(host, port, blockId, echoResultCollectCallBack)
-        }
-      })
-    }).asJava
-    copiers.invokeAll(tasks)
-    copiers.shutdown()
-    System.exit(0)
-  }
-}
diff --git a/core/src/main/scala/org/apache/spark/network/netty/ShuffleSender.scala b/core/src/main/scala/org/apache/spark/network/netty/ShuffleSender.scala
deleted file mode 100644
index 95958e30f7eeb..0000000000000
--- a/core/src/main/scala/org/apache/spark/network/netty/ShuffleSender.scala
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.network.netty
-
-import java.io.File
-
-import org.apache.spark.Logging
-import org.apache.spark.util.Utils
-import org.apache.spark.storage.{BlockId, FileSegment}
-
-private[spark] class ShuffleSender(portIn: Int, val pResolver: PathResolver) extends Logging {
-
-  val server = new FileServer(pResolver, portIn)
-  server.start()
-
-  def stop() {
-    server.stop()
-  }
-
-  def port: Int = server.getPort
-}
-
-
-/**
- * An application for testing the shuffle sender as a standalone program.
- */
-private[spark] object ShuffleSender {
-
-  def main(args: Array[String]) {
-    if (args.length < 3) {
-      System.err.println(
-        "Usage: ShuffleSender <port> <subDirsPerLocalDir> <list of shuffle_block_directories>")
-      System.exit(1)
-    }
-
-    val port = args(0).toInt
-    val subDirsPerLocalDir = args(1).toInt
-    val localDirs = args.drop(2).map(new File(_))
-
-    val pResovler = new PathResolver {
-      override def getBlockLocation(blockId: BlockId): FileSegment = {
-        if (!blockId.isShuffle) {
-          throw new Exception("Block " + blockId + " is not a shuffle block")
-        }
-        // Figure out which local directory it hashes to, and which subdirectory in that
-        val hash = Utils.nonNegativeHash(blockId)
-        val dirId = hash % localDirs.length
-        val subDirId = (hash / localDirs.length) % subDirsPerLocalDir
-        val subDir = new File(localDirs(dirId), "%02x".format(subDirId))
-        val file = new File(subDir, blockId.name)
-        new FileSegment(file, 0, file.length())
-      }
-    }
-    val sender = new ShuffleSender(port, pResovler)
-  }
-}
diff --git a/core/src/main/scala/org/apache/spark/network/netty/FileClientChannelInitializer.scala b/core/src/main/scala/org/apache/spark/network/netty/client/BlockClientListener.scala
similarity index 65%
rename from core/src/main/scala/org/apache/spark/network/netty/FileClientChannelInitializer.scala
rename to core/src/main/scala/org/apache/spark/network/netty/client/BlockClientListener.scala
index f4261c13f70a8..e28219dd7745b 100644
--- a/core/src/main/scala/org/apache/spark/network/netty/FileClientChannelInitializer.scala
+++ b/core/src/main/scala/org/apache/spark/network/netty/client/BlockClientListener.scala
@@ -15,17 +15,15 @@
  * limitations under the License.
  */
 
-package org.apache.spark.network.netty
+package org.apache.spark.network.netty.client
 
-import io.netty.channel.ChannelInitializer
-import io.netty.channel.socket.SocketChannel
-import io.netty.handler.codec.string.StringEncoder
+import java.util.EventListener
 
 
-class FileClientChannelInitializer(handler: FileClientHandler)
-  extends ChannelInitializer[SocketChannel] {
+trait BlockClientListener extends EventListener {
+
+  def onFetchSuccess(blockId: String, data: ReferenceCountedBuffer): Unit
+
+  def onFetchFailure(blockId: String, errorMsg: String): Unit
 
-  def initChannel(channel: SocketChannel) {
-    channel.pipeline.addLast("encoder", new StringEncoder).addLast("handler", handler)
-  }
 }
diff --git a/core/src/main/scala/org/apache/spark/network/netty/client/BlockFetchingClient.scala b/core/src/main/scala/org/apache/spark/network/netty/client/BlockFetchingClient.scala
new file mode 100644
index 0000000000000..5aea7ba2f3673
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/network/netty/client/BlockFetchingClient.scala
@@ -0,0 +1,132 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.netty.client
+
+import java.util.concurrent.TimeoutException
+
+import io.netty.bootstrap.Bootstrap
+import io.netty.buffer.PooledByteBufAllocator
+import io.netty.channel.socket.SocketChannel
+import io.netty.channel.{ChannelFutureListener, ChannelFuture, ChannelInitializer, ChannelOption}
+import io.netty.handler.codec.LengthFieldBasedFrameDecoder
+import io.netty.handler.codec.string.StringEncoder
+import io.netty.util.CharsetUtil
+
+import org.apache.spark.Logging
+
+/**
+ * Client for fetching data blocks from [[org.apache.spark.network.netty.server.BlockServer]].
+ * Use [[BlockFetchingClientFactory]] to instantiate this client.
+ *
+ * The constructor blocks until a connection is successfully established.
+ *
+ * See [[org.apache.spark.network.netty.server.BlockServer]] for client/server protocol.
+ *
+ * Concurrency: thread safe and can be called from multiple threads.
+ */
+@throws[TimeoutException]
+private[spark]
+class BlockFetchingClient(factory: BlockFetchingClientFactory, hostname: String, port: Int)
+  extends Logging {
+
+  private val handler = new BlockFetchingClientHandler
+
+  /** Netty Bootstrap for creating the TCP connection. */
+  private val bootstrap: Bootstrap = {
+    val b = new Bootstrap
+    b.group(factory.workerGroup)
+      .channel(factory.socketChannelClass)
+      // Use pooled buffers to reduce temporary buffer allocation
+      .option(ChannelOption.ALLOCATOR, PooledByteBufAllocator.DEFAULT)
+      // Disable Nagle's Algorithm since we don't want packets to wait
+      .option(ChannelOption.TCP_NODELAY, java.lang.Boolean.TRUE)
+      .option(ChannelOption.SO_KEEPALIVE, java.lang.Boolean.TRUE)
+      .option[Integer](ChannelOption.CONNECT_TIMEOUT_MILLIS, factory.conf.connectTimeoutMs)
+
+    b.handler(new ChannelInitializer[SocketChannel] {
+      override def initChannel(ch: SocketChannel): Unit = {
+        ch.pipeline
+          .addLast("encoder", new StringEncoder(CharsetUtil.UTF_8))
+          // maxFrameLength = 2G, lengthFieldOffset = 0, lengthFieldLength = 4
+          .addLast("framedLengthDecoder", new LengthFieldBasedFrameDecoder(Int.MaxValue, 0, 4))
+          .addLast("handler", handler)
+      }
+    })
+    b
+  }
+
+  /** Netty ChannelFuture for the connection. */
+  private val cf: ChannelFuture = bootstrap.connect(hostname, port)
+  if (!cf.awaitUninterruptibly(factory.conf.connectTimeoutMs)) {
+    throw new TimeoutException(
+      s"Connecting to $hostname:$port timed out (${factory.conf.connectTimeoutMs} ms)")
+  }
+
+  /**
+   * Ask the remote server for a sequence of blocks, and execute the callback.
+   *
+   * Note that this is asynchronous and returns immediately. Upstream caller should throttle the
+   * rate of fetching; otherwise we could run out of memory.
+   *
+   * @param blockIds sequence of block ids to fetch.
+   * @param listener callback to fire on fetch success / failure.
+   */
+  def fetchBlocks(blockIds: Seq[String], listener: BlockClientListener): Unit = {
+    // It's best to limit the number of "write" calls since it needs to traverse the whole pipeline.
+    // It's also best to limit the number of "flush" calls since it requires system calls.
+    // Let's concatenate the string and then call writeAndFlush once.
+    // This is also why this implementation might be more efficient than multiple, separate
+    // fetch block calls.
+    var startTime: Long = 0
+    logTrace {
+      startTime = System.nanoTime
+      s"Sending request $blockIds to $hostname:$port"
+    }
+
+    blockIds.foreach { blockId =>
+      handler.addRequest(blockId, listener)
+    }
+
+    val writeFuture = cf.channel().writeAndFlush(blockIds.mkString("\n") + "\n")
+    writeFuture.addListener(new ChannelFutureListener {
+      override def operationComplete(future: ChannelFuture): Unit = {
+        if (future.isSuccess) {
+          logTrace {
+            val timeTaken = (System.nanoTime - startTime).toDouble / 1000000
+            s"Sending request $blockIds to $hostname:$port took $timeTaken ms"
+          }
+        } else {
+          // Fail all blocks.
+          val errorMsg =
+            s"Failed to send request $blockIds to $hostname:$port: ${future.cause.getMessage}"
+          logError(errorMsg, future.cause)
+          blockIds.foreach { blockId =>
+            listener.onFetchFailure(blockId, errorMsg)
+            handler.removeRequest(blockId)
+          }
+        }
+      }
+    })
+  }
+
+  def waitForClose(): Unit = {
+    cf.channel().closeFuture().sync()
+  }
+
+  def close(): Unit = cf.channel().close()
+}
diff --git a/core/src/main/scala/org/apache/spark/network/netty/client/BlockFetchingClientFactory.scala b/core/src/main/scala/org/apache/spark/network/netty/client/BlockFetchingClientFactory.scala
new file mode 100644
index 0000000000000..2b28402c52b49
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/network/netty/client/BlockFetchingClientFactory.scala
@@ -0,0 +1,99 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.netty.client
+
+import io.netty.channel.epoll.{EpollEventLoopGroup, EpollSocketChannel}
+import io.netty.channel.nio.NioEventLoopGroup
+import io.netty.channel.oio.OioEventLoopGroup
+import io.netty.channel.socket.nio.NioSocketChannel
+import io.netty.channel.socket.oio.OioSocketChannel
+import io.netty.channel.{EventLoopGroup, Channel}
+
+import org.apache.spark.SparkConf
+import org.apache.spark.network.netty.NettyConfig
+import org.apache.spark.util.Utils
+
+/**
+ * Factory for creating [[BlockFetchingClient]] by using createClient. This factory reuses
+ * the worker thread pool for Netty.
+ *
+ * Concurrency: createClient is safe to be called from multiple threads concurrently.
+ */
+private[spark]
+class BlockFetchingClientFactory(val conf: NettyConfig) {
+
+  def this(sparkConf: SparkConf) = this(new NettyConfig(sparkConf))
+
+  /** A thread factory so the threads are named (for debugging). */
+  val threadFactory = Utils.namedThreadFactory("spark-shuffle-client")
+
+  /** The following two are instantiated by the [[init]] method, depending ioMode. */
+  var socketChannelClass: Class[_ <: Channel] = _
+  var workerGroup: EventLoopGroup = _
+
+  init()
+
+  /** Initialize [[socketChannelClass]] and [[workerGroup]] based on ioMode. */
+  private def init(): Unit = {
+    def initOio(): Unit = {
+      socketChannelClass = classOf[OioSocketChannel]
+      workerGroup = new OioEventLoopGroup(0, threadFactory)
+    }
+    def initNio(): Unit = {
+      socketChannelClass = classOf[NioSocketChannel]
+      workerGroup = new NioEventLoopGroup(0, threadFactory)
+    }
+    def initEpoll(): Unit = {
+      socketChannelClass = classOf[EpollSocketChannel]
+      workerGroup = new EpollEventLoopGroup(0, threadFactory)
+    }
+
+    conf.ioMode match {
+      case "nio" => initNio()
+      case "oio" => initOio()
+      case "epoll" => initEpoll()
+      case "auto" =>
+        // For auto mode, first try epoll (only available on Linux), then nio.
+        try {
+          initEpoll()
+        } catch {
+          // TODO: Should we log the throwable? But that always happen on non-Linux systems.
+          // Perhaps the right thing to do is to check whether the system is Linux, and then only
+          // call initEpoll on Linux.
+          case e: Throwable => initNio()
+        }
+    }
+  }
+
+  /**
+   * Create a new BlockFetchingClient connecting to the given remote host / port.
+   *
+   * This blocks until a connection is successfully established.
+   *
+   * Concurrency: This method is safe to call from multiple threads.
+   */
+  def createClient(remoteHost: String, remotePort: Int): BlockFetchingClient = {
+    new BlockFetchingClient(this, remoteHost, remotePort)
+  }
+
+  def stop(): Unit = {
+    if (workerGroup != null) {
+      workerGroup.shutdownGracefully()
+    }
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/network/netty/client/BlockFetchingClientHandler.scala b/core/src/main/scala/org/apache/spark/network/netty/client/BlockFetchingClientHandler.scala
new file mode 100644
index 0000000000000..83265b164299d
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/network/netty/client/BlockFetchingClientHandler.scala
@@ -0,0 +1,103 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.netty.client
+
+import io.netty.buffer.ByteBuf
+import io.netty.channel.{ChannelHandlerContext, SimpleChannelInboundHandler}
+
+import org.apache.spark.Logging
+
+
+/**
+ * Handler that processes server responses. It uses the protocol documented in
+ * [[org.apache.spark.network.netty.server.BlockServer]].
+ *
+ * Concurrency: thread safe and can be called from multiple threads.
+ */
+private[client]
+class BlockFetchingClientHandler extends SimpleChannelInboundHandler[ByteBuf] with Logging {
+
+  /** Tracks the list of outstanding requests and their listeners on success/failure. */
+  private val outstandingRequests = java.util.Collections.synchronizedMap {
+    new java.util.HashMap[String, BlockClientListener]
+  }
+
+  def addRequest(blockId: String, listener: BlockClientListener): Unit = {
+    outstandingRequests.put(blockId, listener)
+  }
+
+  def removeRequest(blockId: String): Unit = {
+    outstandingRequests.remove(blockId)
+  }
+
+  override def exceptionCaught(ctx: ChannelHandlerContext, cause: Throwable): Unit = {
+    val errorMsg = s"Exception in connection from ${ctx.channel.remoteAddress}: ${cause.getMessage}"
+    logError(errorMsg, cause)
+
+    // Fire the failure callback for all outstanding blocks
+    outstandingRequests.synchronized {
+      val iter = outstandingRequests.entrySet().iterator()
+      while (iter.hasNext) {
+        val entry = iter.next()
+        entry.getValue.onFetchFailure(entry.getKey, errorMsg)
+      }
+      outstandingRequests.clear()
+    }
+
+    ctx.close()
+  }
+
+  override def channelRead0(ctx: ChannelHandlerContext, in: ByteBuf) {
+    val totalLen = in.readInt()
+    val blockIdLen = in.readInt()
+    val blockIdBytes = new Array[Byte](math.abs(blockIdLen))
+    in.readBytes(blockIdBytes)
+    val blockId = new String(blockIdBytes)
+    val blockSize = totalLen - math.abs(blockIdLen) - 4
+
+    def server = ctx.channel.remoteAddress.toString
+
+    // blockIdLen is negative when it is an error message.
+    if (blockIdLen < 0) {
+      val errorMessageBytes = new Array[Byte](blockSize)
+      in.readBytes(errorMessageBytes)
+      val errorMsg = new String(errorMessageBytes)
+      logTrace(s"Received block $blockId ($blockSize B) with error $errorMsg from $server")
+
+      val listener = outstandingRequests.get(blockId)
+      if (listener == null) {
+        // Ignore callback
+        logWarning(s"Got a response for block $blockId but it is not in our outstanding requests")
+      } else {
+        outstandingRequests.remove(blockId)
+        listener.onFetchFailure(blockId, errorMsg)
+      }
+    } else {
+      logTrace(s"Received block $blockId ($blockSize B) from $server")
+
+      val listener = outstandingRequests.get(blockId)
+      if (listener == null) {
+        // Ignore callback
+        logWarning(s"Got a response for block $blockId but it is not in our outstanding requests")
+      } else {
+        outstandingRequests.remove(blockId)
+        listener.onFetchSuccess(blockId, new ReferenceCountedBuffer(in))
+      }
+    }
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/network/netty/client/LazyInitIterator.scala b/core/src/main/scala/org/apache/spark/network/netty/client/LazyInitIterator.scala
new file mode 100644
index 0000000000000..9740ee64d1f2d
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/network/netty/client/LazyInitIterator.scala
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.netty.client
+
+/**
+ * A simple iterator that lazily initializes the underlying iterator.
+ *
+ * The use case is that sometimes we might have many iterators open at the same time, and each of
+ * the iterator might initialize its own buffer (e.g. decompression buffer, deserialization buffer).
+ * This could lead to too many buffers open. If this iterator is used, we lazily initialize those
+ * buffers.
+ */
+private[spark]
+class LazyInitIterator(createIterator: => Iterator[Any]) extends Iterator[Any] {
+
+  lazy val proxy = createIterator
+
+  override def hasNext: Boolean = {
+    val gotNext = proxy.hasNext
+    if (!gotNext) {
+      close()
+    }
+    gotNext
+  }
+
+  override def next(): Any = proxy.next()
+
+  def close(): Unit = Unit
+}
diff --git a/core/src/main/scala/org/apache/spark/network/netty/client/ReferenceCountedBuffer.scala b/core/src/main/scala/org/apache/spark/network/netty/client/ReferenceCountedBuffer.scala
new file mode 100644
index 0000000000000..ea1abf5eccc26
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/network/netty/client/ReferenceCountedBuffer.scala
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.netty.client
+
+import java.io.InputStream
+import java.nio.ByteBuffer
+
+import io.netty.buffer.{ByteBuf, ByteBufInputStream}
+
+
+/**
+ * A buffer abstraction based on Netty's ByteBuf so we don't expose Netty.
+ * This is a Scala value class.
+ *
+ * The buffer's life cycle is NOT managed by the JVM, and thus requiring explicit declaration of
+ * reference by the retain method and release method.
+ */
+private[spark]
+class ReferenceCountedBuffer(val underlying: ByteBuf) extends AnyVal {
+
+  /** Return the nio ByteBuffer view of the underlying buffer. */
+  def byteBuffer(): ByteBuffer = underlying.nioBuffer
+
+  /** Creates a new input stream that starts from the current position of the buffer. */
+  def inputStream(): InputStream = new ByteBufInputStream(underlying)
+
+  /** Increment the reference counter by one. */
+  def retain(): Unit = underlying.retain()
+
+  /** Decrement the reference counter by one and release the buffer if the ref count is 0. */
+  def release(): Unit = underlying.release()
+}
diff --git a/core/src/main/scala/org/apache/spark/network/netty/server/BlockHeader.scala b/core/src/main/scala/org/apache/spark/network/netty/server/BlockHeader.scala
new file mode 100644
index 0000000000000..162e9cc6828d4
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/network/netty/server/BlockHeader.scala
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.netty.server
+
+/**
+ * Header describing a block. This is used only in the server pipeline.
+ *
+ * [[BlockServerHandler]] creates this, and [[BlockHeaderEncoder]] encodes it.
+ *
+ * @param blockSize length of the block content, excluding the length itself.
+ *                 If positive, this is the header for a block (not part of the header).
+ *                 If negative, this is the header and content for an error message.
+ * @param blockId block id
+ * @param error some error message from reading the block
+ */
+private[server]
+class BlockHeader(val blockSize: Int, val blockId: String, val error: Option[String] = None)
diff --git a/core/src/main/scala/org/apache/spark/network/netty/server/BlockHeaderEncoder.scala b/core/src/main/scala/org/apache/spark/network/netty/server/BlockHeaderEncoder.scala
new file mode 100644
index 0000000000000..8e4dda4ef8595
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/network/netty/server/BlockHeaderEncoder.scala
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.netty.server
+
+import io.netty.buffer.ByteBuf
+import io.netty.channel.ChannelHandlerContext
+import io.netty.handler.codec.MessageToByteEncoder
+
+/**
+ * A simple encoder for BlockHeader. See [[BlockServer]] for the server to client protocol.
+ */
+private[server]
+class BlockHeaderEncoder extends MessageToByteEncoder[BlockHeader] {
+  override def encode(ctx: ChannelHandlerContext, msg: BlockHeader, out: ByteBuf): Unit = {
+    // message = message length (4 bytes) + block id length (4 bytes) + block id + block data
+    // message length = block id length (4 bytes) + size of block id + size of block data
+    val blockIdBytes = msg.blockId.getBytes
+    msg.error match {
+      case Some(errorMsg) =>
+        val errorBytes = errorMsg.getBytes
+        out.writeInt(4 + blockIdBytes.length + errorBytes.size)
+        out.writeInt(-blockIdBytes.length)  // use negative block id length to represent errors
+        out.writeBytes(blockIdBytes)  // next is blockId itself
+        out.writeBytes(errorBytes)  // error message
+      case None =>
+        out.writeInt(4 + blockIdBytes.length + msg.blockSize)
+        out.writeInt(blockIdBytes.length)  // First 4 bytes is blockId length
+        out.writeBytes(blockIdBytes)  // next is blockId itself
+        // msg of size blockSize will be written by ServerHandler
+    }
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/network/netty/server/BlockServer.scala b/core/src/main/scala/org/apache/spark/network/netty/server/BlockServer.scala
new file mode 100644
index 0000000000000..7b2f9a8d4dfd0
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/network/netty/server/BlockServer.scala
@@ -0,0 +1,162 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.netty.server
+
+import java.net.InetSocketAddress
+
+import io.netty.bootstrap.ServerBootstrap
+import io.netty.buffer.PooledByteBufAllocator
+import io.netty.channel.{ChannelFuture, ChannelInitializer, ChannelOption}
+import io.netty.channel.epoll.{EpollEventLoopGroup, EpollServerSocketChannel}
+import io.netty.channel.nio.NioEventLoopGroup
+import io.netty.channel.oio.OioEventLoopGroup
+import io.netty.channel.socket.SocketChannel
+import io.netty.channel.socket.nio.NioServerSocketChannel
+import io.netty.channel.socket.oio.OioServerSocketChannel
+import io.netty.handler.codec.LineBasedFrameDecoder
+import io.netty.handler.codec.string.StringDecoder
+import io.netty.util.CharsetUtil
+
+import org.apache.spark.{Logging, SparkConf}
+import org.apache.spark.network.netty.NettyConfig
+import org.apache.spark.storage.BlockDataProvider
+import org.apache.spark.util.Utils
+
+
+/**
+ * Server for serving Spark data blocks.
+ * This should be used together with [[org.apache.spark.network.netty.client.BlockFetchingClient]].
+ *
+ * Protocol for requesting blocks (client to server):
+ *   One block id per line, e.g. to request 3 blocks: "block1\nblock2\nblock3\n"
+ *
+ * Protocol for sending blocks (server to client):
+ *   frame-length (4 bytes), block-id-length (4 bytes), block-id, block-data.
+ *
+ *   frame-length should not include the length of itself.
+ *   If block-id-length is negative, then this is an error message rather than block-data. The real
+ *   length is the absolute value of the frame-length.
+ *
+ */
+private[spark]
+class BlockServer(conf: NettyConfig, dataProvider: BlockDataProvider) extends Logging {
+
+  def this(sparkConf: SparkConf, dataProvider: BlockDataProvider) = {
+    this(new NettyConfig(sparkConf), dataProvider)
+  }
+
+  def port: Int = _port
+
+  def hostName: String = _hostName
+
+  private var _port: Int = conf.serverPort
+  private var _hostName: String = ""
+  private var bootstrap: ServerBootstrap = _
+  private var channelFuture: ChannelFuture = _
+
+  init()
+
+  /** Initialize the server. */
+  private def init(): Unit = {
+    bootstrap = new ServerBootstrap
+    val bossThreadFactory = Utils.namedThreadFactory("spark-shuffle-server-boss")
+    val workerThreadFactory = Utils.namedThreadFactory("spark-shuffle-server-worker")
+
+    // Use only one thread to accept connections, and 2 * num_cores for worker.
+    def initNio(): Unit = {
+      val bossGroup = new NioEventLoopGroup(1, bossThreadFactory)
+      val workerGroup = new NioEventLoopGroup(0, workerThreadFactory)
+      workerGroup.setIoRatio(conf.ioRatio)
+      bootstrap.group(bossGroup, workerGroup).channel(classOf[NioServerSocketChannel])
+    }
+    def initOio(): Unit = {
+      val bossGroup = new OioEventLoopGroup(1, bossThreadFactory)
+      val workerGroup = new OioEventLoopGroup(0, workerThreadFactory)
+      bootstrap.group(bossGroup, workerGroup).channel(classOf[OioServerSocketChannel])
+    }
+    def initEpoll(): Unit = {
+      val bossGroup = new EpollEventLoopGroup(1, bossThreadFactory)
+      val workerGroup = new EpollEventLoopGroup(0, workerThreadFactory)
+      workerGroup.setIoRatio(conf.ioRatio)
+      bootstrap.group(bossGroup, workerGroup).channel(classOf[EpollServerSocketChannel])
+    }
+
+    conf.ioMode match {
+      case "nio" => initNio()
+      case "oio" => initOio()
+      case "epoll" => initEpoll()
+      case "auto" =>
+        // For auto mode, first try epoll (only available on Linux), then nio.
+        try {
+          initEpoll()
+        } catch {
+          // TODO: Should we log the throwable? But that always happen on non-Linux systems.
+          // Perhaps the right thing to do is to check whether the system is Linux, and then only
+          // call initEpoll on Linux.
+          case e: Throwable => initNio()
+        }
+    }
+
+    // Use pooled buffers to reduce temporary buffer allocation
+    bootstrap.option(ChannelOption.ALLOCATOR, PooledByteBufAllocator.DEFAULT)
+    bootstrap.childOption(ChannelOption.ALLOCATOR, PooledByteBufAllocator.DEFAULT)
+
+    // Various (advanced) user-configured settings.
+    conf.backLog.foreach { backLog =>
+      bootstrap.option[java.lang.Integer](ChannelOption.SO_BACKLOG, backLog)
+    }
+    conf.receiveBuf.foreach { receiveBuf =>
+      bootstrap.option[java.lang.Integer](ChannelOption.SO_RCVBUF, receiveBuf)
+    }
+    conf.sendBuf.foreach { sendBuf =>
+      bootstrap.option[java.lang.Integer](ChannelOption.SO_SNDBUF, sendBuf)
+    }
+
+    bootstrap.childHandler(new ChannelInitializer[SocketChannel] {
+      override def initChannel(ch: SocketChannel): Unit = {
+        ch.pipeline
+          .addLast("frameDecoder", new LineBasedFrameDecoder(1024))  // max block id length 1024
+          .addLast("stringDecoder", new StringDecoder(CharsetUtil.UTF_8))
+          .addLast("blockHeaderEncoder", new BlockHeaderEncoder)
+          .addLast("handler", new BlockServerHandler(dataProvider))
+      }
+    })
+
+    channelFuture = bootstrap.bind(new InetSocketAddress(_port))
+    channelFuture.sync()
+
+    val addr = channelFuture.channel.localAddress.asInstanceOf[InetSocketAddress]
+    _port = addr.getPort
+    _hostName = addr.getHostName
+  }
+
+  /** Shutdown the server. */
+  def stop(): Unit = {
+    if (channelFuture != null) {
+      channelFuture.channel().close().awaitUninterruptibly()
+      channelFuture = null
+    }
+    if (bootstrap != null && bootstrap.group() != null) {
+      bootstrap.group().shutdownGracefully()
+    }
+    if (bootstrap != null && bootstrap.childGroup() != null) {
+      bootstrap.childGroup().shutdownGracefully()
+    }
+    bootstrap = null
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/network/netty/FileServerChannelInitializer.scala b/core/src/main/scala/org/apache/spark/network/netty/server/BlockServerChannelInitializer.scala
similarity index 58%
rename from core/src/main/scala/org/apache/spark/network/netty/FileServerChannelInitializer.scala
rename to core/src/main/scala/org/apache/spark/network/netty/server/BlockServerChannelInitializer.scala
index aaa2f913d0269..cc70bd0c5c477 100644
--- a/core/src/main/scala/org/apache/spark/network/netty/FileServerChannelInitializer.scala
+++ b/core/src/main/scala/org/apache/spark/network/netty/server/BlockServerChannelInitializer.scala
@@ -15,20 +15,26 @@
  * limitations under the License.
  */
 
-package org.apache.spark.network.netty
+package org.apache.spark.network.netty.server
 
 import io.netty.channel.ChannelInitializer
 import io.netty.channel.socket.SocketChannel
-import io.netty.handler.codec.{DelimiterBasedFrameDecoder, Delimiters}
+import io.netty.handler.codec.LineBasedFrameDecoder
 import io.netty.handler.codec.string.StringDecoder
+import io.netty.util.CharsetUtil
+import org.apache.spark.storage.BlockDataProvider
 
-class FileServerChannelInitializer(pResolver: PathResolver)
+
+/** Channel initializer that sets up the pipeline for the BlockServer. */
+private[netty]
+class BlockServerChannelInitializer(dataProvider: BlockDataProvider)
   extends ChannelInitializer[SocketChannel] {
 
-  override def initChannel(channel: SocketChannel): Unit = {
-    channel.pipeline
-      .addLast("framer", new DelimiterBasedFrameDecoder(8192, Delimiters.lineDelimiter : _*))
-      .addLast("stringDecoder", new StringDecoder)
-      .addLast("handler", new FileServerHandler(pResolver))
+  override def initChannel(ch: SocketChannel): Unit = {
+    ch.pipeline
+      .addLast("frameDecoder", new LineBasedFrameDecoder(1024))  // max block id length 1024
+      .addLast("stringDecoder", new StringDecoder(CharsetUtil.UTF_8))
+      .addLast("blockHeaderEncoder", new BlockHeaderEncoder)
+      .addLast("handler", new BlockServerHandler(dataProvider))
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/network/netty/server/BlockServerHandler.scala b/core/src/main/scala/org/apache/spark/network/netty/server/BlockServerHandler.scala
new file mode 100644
index 0000000000000..40dd5e5d1a2ac
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/network/netty/server/BlockServerHandler.scala
@@ -0,0 +1,140 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.netty.server
+
+import java.io.FileInputStream
+import java.nio.ByteBuffer
+import java.nio.channels.FileChannel
+
+import io.netty.buffer.Unpooled
+import io.netty.channel._
+
+import org.apache.spark.Logging
+import org.apache.spark.storage.{FileSegment, BlockDataProvider}
+
+
+/**
+ * A handler that processes requests from clients and writes block data back.
+ *
+ * The messages should have been processed by a LineBasedFrameDecoder and a StringDecoder first
+ * so channelRead0 is called once per line (i.e. per block id).
+ */
+private[server]
+class BlockServerHandler(dataProvider: BlockDataProvider)
+  extends SimpleChannelInboundHandler[String] with Logging {
+
+  override def exceptionCaught(ctx: ChannelHandlerContext, cause: Throwable): Unit = {
+    logError(s"Exception in connection from ${ctx.channel.remoteAddress}", cause)
+    ctx.close()
+  }
+
+  override def channelRead0(ctx: ChannelHandlerContext, blockId: String): Unit = {
+    def client = ctx.channel.remoteAddress.toString
+
+    // A helper function to send error message back to the client.
+    def respondWithError(error: String): Unit = {
+      ctx.writeAndFlush(new BlockHeader(-1, blockId, Some(error))).addListener(
+        new ChannelFutureListener {
+          override def operationComplete(future: ChannelFuture) {
+            if (!future.isSuccess) {
+              // TODO: Maybe log the success case as well.
+              logError(s"Error sending error back to $client", future.cause)
+              ctx.close()
+            }
+          }
+        }
+      )
+    }
+
+    def writeFileSegment(segment: FileSegment): Unit = {
+      // Send error message back if the block is too large. Even though we are capable of sending
+      // large (2G+) blocks, the receiving end cannot handle it so let's fail fast.
+      // Once we fixed the receiving end to be able to process large blocks, this should be removed.
+      // Also make sure we update BlockHeaderEncoder to support length > 2G.
+
+      // See [[BlockHeaderEncoder]] for the way length is encoded.
+      if (segment.length + blockId.length + 4 > Int.MaxValue) {
+        respondWithError(s"Block $blockId size ($segment.length) greater than 2G")
+        return
+      }
+
+      var fileChannel: FileChannel = null
+      try {
+        fileChannel = new FileInputStream(segment.file).getChannel
+      } catch {
+        case e: Exception =>
+          logError(
+            s"Error opening channel for $blockId in ${segment.file} for request from $client", e)
+          respondWithError(e.getMessage)
+      }
+
+      // Found the block. Send it back.
+      if (fileChannel != null) {
+        // Write the header and block data. In the case of failures, the listener on the block data
+        // write should close the connection.
+        ctx.write(new BlockHeader(segment.length.toInt, blockId))
+
+        val region = new DefaultFileRegion(fileChannel, segment.offset, segment.length)
+        ctx.writeAndFlush(region).addListener(new ChannelFutureListener {
+          override def operationComplete(future: ChannelFuture) {
+            if (future.isSuccess) {
+              logTrace(s"Sent block $blockId (${segment.length} B) back to $client")
+            } else {
+              logError(s"Error sending block $blockId to $client; closing connection", future.cause)
+              ctx.close()
+            }
+          }
+        })
+      }
+    }
+
+    def writeByteBuffer(buf: ByteBuffer): Unit = {
+      ctx.write(new BlockHeader(buf.remaining, blockId))
+      ctx.writeAndFlush(Unpooled.wrappedBuffer(buf)).addListener(new ChannelFutureListener {
+        override def operationComplete(future: ChannelFuture) {
+          if (future.isSuccess) {
+            logTrace(s"Sent block $blockId (${buf.remaining} B) back to $client")
+          } else {
+            logError(s"Error sending block $blockId to $client; closing connection", future.cause)
+            ctx.close()
+          }
+        }
+      })
+    }
+
+    logTrace(s"Received request from $client to fetch block $blockId")
+
+    var blockData: Either[FileSegment, ByteBuffer] = null
+
+    // First make sure we can find the block. If not, send error back to the user.
+    try {
+      blockData = dataProvider.getBlockData(blockId)
+    } catch {
+      case e: Exception =>
+        logError(s"Error opening block $blockId for request from $client", e)
+        respondWithError(e.getMessage)
+        return
+    }
+
+    blockData match {
+      case Left(segment) => writeFileSegment(segment)
+      case Right(buf) => writeByteBuffer(buf)
+    }
+
+  }  // end of channelRead0
+}
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockDataProvider.scala b/core/src/main/scala/org/apache/spark/storage/BlockDataProvider.scala
new file mode 100644
index 0000000000000..5b6d086630834
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/storage/BlockDataProvider.scala
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.storage
+
+import java.nio.ByteBuffer
+
+
+/**
+ * An interface for providing data for blocks.
+ *
+ * getBlockData returns either a FileSegment (for zero-copy send), or a ByteBuffer.
+ *
+ * Aside from unit tests, [[BlockManager]] is the main class that implements this.
+ */
+private[spark] trait BlockDataProvider {
+  def getBlockData(blockId: String): Either[FileSegment, ByteBuffer]
+}
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockFetcherIterator.scala b/core/src/main/scala/org/apache/spark/storage/BlockFetcherIterator.scala
index 5f44f5f3197fd..ca60ec78b62ee 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockFetcherIterator.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockFetcherIterator.scala
@@ -18,19 +18,17 @@
 package org.apache.spark.storage
 
 import java.util.concurrent.LinkedBlockingQueue
+import org.apache.spark.network.netty.client.{BlockClientListener, LazyInitIterator, ReferenceCountedBuffer}
 
 import scala.collection.mutable.ArrayBuffer
 import scala.collection.mutable.HashSet
 import scala.collection.mutable.Queue
 import scala.util.{Failure, Success}
 
-import io.netty.buffer.ByteBuf
-
 import org.apache.spark.{Logging, SparkException}
 import org.apache.spark.executor.ShuffleReadMetrics
 import org.apache.spark.network.BufferMessage
 import org.apache.spark.network.ConnectionManagerId
-import org.apache.spark.network.netty.ShuffleCopier
 import org.apache.spark.serializer.Serializer
 import org.apache.spark.util.Utils
 
@@ -54,18 +52,28 @@ trait BlockFetcherIterator extends Iterator[(BlockId, Option[Iterator[Any]])] wi
 private[storage]
 object BlockFetcherIterator {
 
-  // A request to fetch one or more blocks, complete with their sizes
+  /**
+   * A request to fetch blocks from a remote BlockManager.
+   * @param address remote BlockManager to fetch from.
+   * @param blocks Sequence of tuple, where the first element is the block id,
+   *               and the second element is the estimated size, used to calculate bytesInFlight.
+   */
   class FetchRequest(val address: BlockManagerId, val blocks: Seq[(BlockId, Long)]) {
     val size = blocks.map(_._2).sum
   }
 
-  // A result of a fetch. Includes the block ID, size in bytes, and a function to deserialize
-  // the block (since we want all deserializaton to happen in the calling thread); can also
-  // represent a fetch failure if size == -1.
+  /**
+   * Result of a fetch from a remote block. A failure is represented as size == -1.
+   * @param blockId block id
+   * @param size estimated size of the block, used to calculate bytesInFlight.
+   *             Note that this is NOT the exact bytes.
+   * @param deserialize closure to return the result in the form of an Iterator.
+   */
   class FetchResult(val blockId: BlockId, val size: Long, val deserialize: () => Iterator[Any]) {
     def failed: Boolean = size == -1
   }
 
+  // TODO: Refactor this whole thing to make code more reusable.
   class BasicBlockFetcherIterator(
       private val blockManager: BlockManager,
       val blocksByAddress: Seq[(BlockManagerId, Seq[(BlockId, Long)])],
@@ -95,10 +103,10 @@ object BlockFetcherIterator {
 
     // Queue of fetch requests to issue; we'll pull requests off this gradually to make sure that
     // the number of bytes in flight is limited to maxBytesInFlight
-    private val fetchRequests = new Queue[FetchRequest]
+    protected val fetchRequests = new Queue[FetchRequest]
 
     // Current bytes in flight from our requests
-    private var bytesInFlight = 0L
+    protected var bytesInFlight = 0L
 
     protected def sendRequest(req: FetchRequest) {
       logDebug("Sending request for %d blocks (%s) from %s".format(
@@ -262,77 +270,58 @@ object BlockFetcherIterator {
       readMetrics: ShuffleReadMetrics)
     extends BasicBlockFetcherIterator(blockManager, blocksByAddress, serializer, readMetrics) {
 
-    import blockManager._
+    override protected def sendRequest(req: FetchRequest) {
+      logDebug("Sending request for %d blocks (%s) from %s".format(
+        req.blocks.size, Utils.bytesToString(req.size), req.address.hostPort))
+      val cmId = new ConnectionManagerId(req.address.host, req.address.port)
 
-    val fetchRequestsSync = new LinkedBlockingQueue[FetchRequest]
+      bytesInFlight += req.size
+      val sizeMap = req.blocks.toMap // so we can look up the size of each blockID
+
+      // This could throw a TimeoutException. In that case we will just retry the task.
+      val client = blockManager.nettyBlockClientFactory.createClient(
+        cmId.host, req.address.nettyPort)
+      val blocks = req.blocks.map(_._1.toString)
+
+      client.fetchBlocks(
+        blocks,
+        new BlockClientListener {
+          override def onFetchFailure(blockId: String, errorMsg: String): Unit = {
+            logError(s"Could not get block(s) from $cmId with error: $errorMsg")
+            for ((blockId, size) <- req.blocks) {
+              results.put(new FetchResult(blockId, -1, null))
+            }
+          }
 
-    private def startCopiers(numCopiers: Int): List[_ <: Thread] = {
-      (for ( i <- Range(0,numCopiers) ) yield {
-        val copier = new Thread {
-          override def run(){
-            try {
-              while(!isInterrupted && !fetchRequestsSync.isEmpty) {
-                sendRequest(fetchRequestsSync.take())
+          override def onFetchSuccess(blockId: String, data: ReferenceCountedBuffer): Unit = {
+            // Increment the reference count so the buffer won't be recycled.
+            // TODO: This could result in memory leaks when the task is stopped due to exception
+            // before the iterator is exhausted.
+            data.retain()
+            val buf = data.byteBuffer()
+            val blockSize = buf.remaining()
+            val bid = BlockId(blockId)
+
+            // TODO: remove code duplication between here and BlockManager.dataDeserialization.
+            results.put(new FetchResult(bid, sizeMap(bid), () => {
+              def createIterator: Iterator[Any] = {
+                val stream = blockManager.wrapForCompression(bid, data.inputStream())
+                serializer.newInstance().deserializeStream(stream).asIterator
               }
-            } catch {
-              case x: InterruptedException => logInfo("Copier Interrupted")
-              // case _ => throw new SparkException("Exception Throw in Shuffle Copier")
+              new LazyInitIterator(createIterator) {
+                // Release the buffer when we are done traversing it.
+                override def close(): Unit = data.release()
+              }
+            }))
+
+            readMetrics.synchronized {
+              readMetrics.remoteBytesRead += blockSize
+              readMetrics.remoteBlocksFetched += 1
             }
+            logDebug("Got remote block " + blockId + " after " + Utils.getUsedTimeMs(startTime))
           }
         }
-        copier.start
-        copier
-      }).toList
-    }
-
-    // keep this to interrupt the threads when necessary
-    private def stopCopiers() {
-      for (copier <- copiers) {
-        copier.interrupt()
-      }
-    }
-
-    override protected def sendRequest(req: FetchRequest) {
-
-      def putResult(blockId: BlockId, blockSize: Long, blockData: ByteBuf) {
-        val fetchResult = new FetchResult(blockId, blockSize,
-          () => dataDeserialize(blockId, blockData.nioBuffer, serializer))
-        results.put(fetchResult)
-      }
-
-      logDebug("Sending request for %d blocks (%s) from %s".format(
-        req.blocks.size, Utils.bytesToString(req.size), req.address.host))
-      val cmId = new ConnectionManagerId(req.address.host, req.address.nettyPort)
-      val cpier = new ShuffleCopier(blockManager.conf)
-      cpier.getBlocks(cmId, req.blocks, putResult)
-      logDebug("Sent request for remote blocks " + req.blocks + " from " + req.address.host )
-    }
-
-    private var copiers: List[_ <: Thread] = null
-
-    override def initialize() {
-      // Split Local Remote Blocks and set numBlocksToFetch
-      val remoteRequests = splitLocalRemoteBlocks()
-      // Add the remote requests into our queue in a random order
-      for (request <- Utils.randomize(remoteRequests)) {
-        fetchRequestsSync.put(request)
-      }
-
-      copiers = startCopiers(conf.getInt("spark.shuffle.copier.threads", 6))
-      logInfo("Started " + fetchRequestsSync.size + " remote fetches in " +
-        Utils.getUsedTimeMs(startTime))
-
-      // Get Local Blocks
-      startTime = System.currentTimeMillis
-      getLocalBlocks()
-      logDebug("Got local blocks in " + Utils.getUsedTimeMs(startTime) + " ms")
-    }
-
-    override def next(): (BlockId, Option[Iterator[Any]]) = {
-      resultsGotten += 1
-      val result = results.take()
-      // If all the results has been retrieved, copiers will exit automatically
-      (result.blockId, if (result.failed) None else Some(result.deserialize()))
+      )
     }
   }
   // End of NettyBlockFetcherIterator
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
index e4c3d58905e7f..12a92d44f4c36 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
@@ -25,17 +25,20 @@ import scala.concurrent.{Await, Future}
 import scala.concurrent.duration._
 import scala.util.Random
 
-import akka.actor.{ActorSystem, Cancellable, Props}
+import akka.actor.{ActorSystem, Props}
 import sun.nio.ch.DirectBuffer
 
 import org.apache.spark._
 import org.apache.spark.executor._
 import org.apache.spark.io.CompressionCodec
 import org.apache.spark.network._
+import org.apache.spark.network.netty.client.BlockFetchingClientFactory
+import org.apache.spark.network.netty.server.BlockServer
 import org.apache.spark.serializer.Serializer
 import org.apache.spark.shuffle.ShuffleManager
 import org.apache.spark.util._
 
+
 private[spark] sealed trait BlockValues
 private[spark] case class ByteBufferValues(buffer: ByteBuffer) extends BlockValues
 private[spark] case class IteratorValues(iterator: Iterator[Any]) extends BlockValues
@@ -60,12 +63,11 @@ private[spark] class BlockManager(
     securityManager: SecurityManager,
     mapOutputTracker: MapOutputTracker,
     shuffleManager: ShuffleManager)
-  extends Logging {
+  extends BlockDataProvider with Logging {
 
   private val port = conf.getInt("spark.blockManager.port", 0)
   val shuffleBlockManager = new ShuffleBlockManager(this, shuffleManager)
-  val diskBlockManager = new DiskBlockManager(shuffleBlockManager,
-    conf.get("spark.local.dir", System.getProperty("java.io.tmpdir")))
+  val diskBlockManager = new DiskBlockManager(shuffleBlockManager, conf)
   val connectionManager =
     new ConnectionManager(port, conf, securityManager, "Connection manager for block manager")
 
@@ -88,13 +90,25 @@ private[spark] class BlockManager(
     new TachyonStore(this, tachyonBlockManager)
   }
 
+  private val useNetty = conf.getBoolean("spark.shuffle.use.netty", false)
+
   // If we use Netty for shuffle, start a new Netty-based shuffle sender service.
-  private val nettyPort: Int = {
-    val useNetty = conf.getBoolean("spark.shuffle.use.netty", false)
-    val nettyPortConfig = conf.getInt("spark.shuffle.sender.port", 0)
-    if (useNetty) diskBlockManager.startShuffleBlockSender(nettyPortConfig) else 0
+  private[storage] val nettyBlockClientFactory: BlockFetchingClientFactory = {
+    if (useNetty) new BlockFetchingClientFactory(conf) else null
   }
 
+  private val nettyBlockServer: BlockServer = {
+    if (useNetty) {
+      val server = new BlockServer(conf, this)
+      logInfo(s"Created NettyBlockServer binding to port: ${server.port}")
+      server
+    } else {
+      null
+    }
+  }
+
+  private val nettyPort: Int = if (useNetty) nettyBlockServer.port else 0
+
   val blockManagerId = BlockManagerId(
     executorId, connectionManager.id.host, connectionManager.id.port, nettyPort)
 
@@ -219,6 +233,20 @@ private[spark] class BlockManager(
     }
   }
 
+  override def getBlockData(blockId: String): Either[FileSegment, ByteBuffer] = {
+    val bid = BlockId(blockId)
+    if (bid.isShuffle) {
+      Left(diskBlockManager.getBlockLocation(bid))
+    } else {
+      val blockBytesOpt = doGetLocal(bid, asBlockResult = false).asInstanceOf[Option[ByteBuffer]]
+      if (blockBytesOpt.isDefined) {
+        Right(blockBytesOpt.get)
+      } else {
+        throw new BlockNotFoundException(blockId)
+      }
+    }
+  }
+
   /**
    * Get the BlockStatus for the block identified by the given ID, if it exists.
    * NOTE: This is mainly for testing, and it doesn't fetch information from Tachyon.
@@ -1064,6 +1092,14 @@ private[spark] class BlockManager(
     connectionManager.stop()
     shuffleBlockManager.stop()
     diskBlockManager.stop()
+
+    if (nettyBlockClientFactory != null) {
+      nettyBlockClientFactory.stop()
+    }
+    if (nettyBlockServer != null) {
+      nettyBlockServer.stop()
+    }
+
     actorSystem.stop(slaveActor)
     blockInfo.clear()
     memoryStore.clear()
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockNotFoundException.scala b/core/src/main/scala/org/apache/spark/storage/BlockNotFoundException.scala
new file mode 100644
index 0000000000000..9ef453605f4f1
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/storage/BlockNotFoundException.scala
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.storage
+
+
+class BlockNotFoundException(blockId: String) extends Exception(s"Block $blockId not found")
diff --git a/core/src/main/scala/org/apache/spark/storage/DiskBlockManager.scala b/core/src/main/scala/org/apache/spark/storage/DiskBlockManager.scala
index 4d66ccea211fa..ec022ce9c048a 100644
--- a/core/src/main/scala/org/apache/spark/storage/DiskBlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/DiskBlockManager.scala
@@ -21,9 +21,9 @@ import java.io.File
 import java.text.SimpleDateFormat
 import java.util.{Date, Random, UUID}
 
-import org.apache.spark.{SparkEnv, Logging}
+import org.apache.spark.{SparkConf, SparkEnv, Logging}
 import org.apache.spark.executor.ExecutorExitCode
-import org.apache.spark.network.netty.{PathResolver, ShuffleSender}
+import org.apache.spark.network.netty.PathResolver
 import org.apache.spark.util.Utils
 import org.apache.spark.shuffle.sort.SortShuffleManager
 
@@ -33,9 +33,10 @@ import org.apache.spark.shuffle.sort.SortShuffleManager
  * However, it is also possible to have a block map to only a segment of a file, by calling
  * mapBlockToFileSegment().
  *
- * @param rootDirs The directories to use for storing block files. Data will be hashed among these.
+ * Block files are hashed among the directories listed in spark.local.dir (or in
+ * SPARK_LOCAL_DIRS, if it's set).
  */
-private[spark] class DiskBlockManager(shuffleBlockManager: ShuffleBlockManager, rootDirs: String)
+private[spark] class DiskBlockManager(shuffleBlockManager: ShuffleBlockManager, conf: SparkConf)
   extends PathResolver with Logging {
 
   private val MAX_DIR_CREATION_ATTEMPTS: Int = 10
@@ -46,13 +47,12 @@ private[spark] class DiskBlockManager(shuffleBlockManager: ShuffleBlockManager,
   /* Create one local directory for each path mentioned in spark.local.dir; then, inside this
    * directory, create multiple subdirectories that we will hash files into, in order to avoid
    * having really large inodes at the top level. */
-  val localDirs: Array[File] = createLocalDirs()
+  val localDirs: Array[File] = createLocalDirs(conf)
   if (localDirs.isEmpty) {
     logError("Failed to create any local dir.")
     System.exit(ExecutorExitCode.DISK_STORE_FAILED_TO_CREATE_DIR)
   }
   private val subDirs = Array.fill(localDirs.length)(new Array[File](subDirsPerLocalDir))
-  private var shuffleSender : ShuffleSender = null
 
   addShutdownHook()
 
@@ -131,10 +131,9 @@ private[spark] class DiskBlockManager(shuffleBlockManager: ShuffleBlockManager,
     (blockId, getFile(blockId))
   }
 
-  private def createLocalDirs(): Array[File] = {
-    logDebug(s"Creating local directories at root dirs '$rootDirs'")
+  private def createLocalDirs(conf: SparkConf): Array[File] = {
     val dateFormat = new SimpleDateFormat("yyyyMMddHHmmss")
-    rootDirs.split(",").flatMap { rootDir =>
+    Utils.getOrCreateLocalRootDirs(conf).flatMap { rootDir =>
       var foundLocalDir = false
       var localDir: File = null
       var localDirId: String = null
@@ -186,15 +185,5 @@ private[spark] class DiskBlockManager(shuffleBlockManager: ShuffleBlockManager,
         }
       }
     }
-
-    if (shuffleSender != null) {
-      shuffleSender.stop()
-    }
-  }
-
-  private[storage] def startShuffleBlockSender(port: Int): Int = {
-    shuffleSender = new ShuffleSender(port, this)
-    logInfo(s"Created ShuffleSender binding to port: ${shuffleSender.port}")
-    shuffleSender.port
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/util/FileLogger.scala b/core/src/main/scala/org/apache/spark/util/FileLogger.scala
index 2e8fbf5a91ee7..ad8b79af877d8 100644
--- a/core/src/main/scala/org/apache/spark/util/FileLogger.scala
+++ b/core/src/main/scala/org/apache/spark/util/FileLogger.scala
@@ -52,7 +52,20 @@ private[spark] class FileLogger(
     override def initialValue(): SimpleDateFormat = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss")
   }
 
-  private val fileSystem = Utils.getHadoopFileSystem(logDir)
+  /**
+   * To avoid effects of FileSystem#close or FileSystem.closeAll called from other modules,
+   * create unique FileSystem instance only for FileLogger
+   */
+  private val fileSystem = {
+    val conf = SparkHadoopUtil.get.newConfiguration()
+    val logUri = new URI(logDir)
+    val scheme = logUri.getScheme
+    if (scheme == "hdfs") {
+      conf.setBoolean("fs.hdfs.impl.disable.cache", true)
+    }
+    FileSystem.get(logUri, conf)
+  }
+
   var fileIndex = 0
 
   // Only used if compression is enabled
diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index 019f68b160894..d6d74ce269219 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -449,12 +449,71 @@ private[spark] object Utils extends Logging {
   }
 
   /**
-   * Get a temporary directory using Spark's spark.local.dir property, if set. This will always
-   * return a single directory, even though the spark.local.dir property might be a list of
-   * multiple paths.
+   * Get the path of a temporary directory.  Spark's local directories can be configured through
+   * multiple settings, which are used with the following precedence:
+   *
+   *   - If called from inside of a YARN container, this will return a directory chosen by YARN.
+   *   - If the SPARK_LOCAL_DIRS environment variable is set, this will return a directory from it.
+   *   - Otherwise, if the spark.local.dir is set, this will return a directory from it.
+   *   - Otherwise, this will return java.io.tmpdir.
+   *
+   * Some of these configuration options might be lists of multiple paths, but this method will
+   * always return a single directory.
    */
   def getLocalDir(conf: SparkConf): String = {
-    conf.get("spark.local.dir",  System.getProperty("java.io.tmpdir")).split(',')(0)
+    getOrCreateLocalRootDirs(conf)(0)
+  }
+
+  private[spark] def isRunningInYarnContainer(conf: SparkConf): Boolean = {
+    // These environment variables are set by YARN.
+    // For Hadoop 0.23.X, we check for YARN_LOCAL_DIRS (we use this below in getYarnLocalDirs())
+    // For Hadoop 2.X, we check for CONTAINER_ID.
+    conf.getenv("CONTAINER_ID") != null || conf.getenv("YARN_LOCAL_DIRS") != null
+  }
+
+  /**
+   * Gets or creates the directories listed in spark.local.dir or SPARK_LOCAL_DIRS,
+   * and returns only the directories that exist / could be created.
+   *
+   * If no directories could be created, this will return an empty list.
+   */
+  private[spark] def getOrCreateLocalRootDirs(conf: SparkConf): Array[String] = {
+    val confValue = if (isRunningInYarnContainer(conf)) {
+      // If we are in yarn mode, systems can have different disk layouts so we must set it
+      // to what Yarn on this system said was available.
+      getYarnLocalDirs(conf)
+    } else {
+      Option(conf.getenv("SPARK_LOCAL_DIRS")).getOrElse(
+        conf.get("spark.local.dir", System.getProperty("java.io.tmpdir")))
+    }
+    val rootDirs = confValue.split(',')
+    logDebug(s"Getting/creating local root dirs at '$confValue'")
+
+    rootDirs.flatMap { rootDir =>
+      val localDir: File = new File(rootDir)
+      val foundLocalDir = localDir.exists || localDir.mkdirs()
+      if (!foundLocalDir) {
+        logError(s"Failed to create local root dir in $rootDir.  Ignoring this directory.")
+        None
+      } else {
+        Some(rootDir)
+      }
+    }
+  }
+
+  /** Get the Yarn approved local directories. */
+  private def getYarnLocalDirs(conf: SparkConf): String = {
+    // Hadoop 0.23 and 2.x have different Environment variable names for the
+    // local dirs, so lets check both. We assume one of the 2 is set.
+    // LOCAL_DIRS => 2.X, YARN_LOCAL_DIRS => 0.23.X
+    val localDirs = Option(conf.getenv("YARN_LOCAL_DIRS"))
+      .getOrElse(Option(conf.getenv("LOCAL_DIRS"))
+      .getOrElse(""))
+
+    if (localDirs.isEmpty) {
+      throw new Exception("Yarn Local dirs can't be empty")
+    }
+    localDirs
   }
 
   /**
diff --git a/core/src/test/scala/org/apache/spark/broadcast/BroadcastSuite.scala b/core/src/test/scala/org/apache/spark/broadcast/BroadcastSuite.scala
index 17c64455b2429..978a6ded80829 100644
--- a/core/src/test/scala/org/apache/spark/broadcast/BroadcastSuite.scala
+++ b/core/src/test/scala/org/apache/spark/broadcast/BroadcastSuite.scala
@@ -17,10 +17,12 @@
 
 package org.apache.spark.broadcast
 
-import org.apache.spark.storage.{BroadcastBlockId, _}
-import org.apache.spark.{LocalSparkContext, SparkConf, SparkContext, SparkException}
 import org.scalatest.FunSuite
 
+import org.apache.spark.{LocalSparkContext, SparkConf, SparkContext, SparkException}
+import org.apache.spark.storage._
+
+
 class BroadcastSuite extends FunSuite with LocalSparkContext {
 
   private val httpConf = broadcastConf("HttpBroadcastFactory")
@@ -124,12 +126,10 @@ class BroadcastSuite extends FunSuite with LocalSparkContext {
   private def testUnpersistHttpBroadcast(distributed: Boolean, removeFromDriver: Boolean) {
     val numSlaves = if (distributed) 2 else 0
 
-    def getBlockIds(id: Long) = Seq[BroadcastBlockId](BroadcastBlockId(id))
-
     // Verify that the broadcast file is created, and blocks are persisted only on the driver
-    def afterCreation(blockIds: Seq[BroadcastBlockId], bmm: BlockManagerMaster) {
-      assert(blockIds.size === 1)
-      val statuses = bmm.getBlockStatus(blockIds.head, askSlaves = true)
+    def afterCreation(broadcastId: Long, bmm: BlockManagerMaster) {
+      val blockId = BroadcastBlockId(broadcastId)
+      val statuses = bmm.getBlockStatus(blockId, askSlaves = true)
       assert(statuses.size === 1)
       statuses.head match { case (bm, status) =>
         assert(bm.executorId === "<driver>", "Block should only be on the driver")
@@ -139,14 +139,14 @@ class BroadcastSuite extends FunSuite with LocalSparkContext {
       }
       if (distributed) {
         // this file is only generated in distributed mode
-        assert(HttpBroadcast.getFile(blockIds.head.broadcastId).exists, "Broadcast file not found!")
+        assert(HttpBroadcast.getFile(blockId.broadcastId).exists, "Broadcast file not found!")
       }
     }
 
     // Verify that blocks are persisted in both the executors and the driver
-    def afterUsingBroadcast(blockIds: Seq[BroadcastBlockId], bmm: BlockManagerMaster) {
-      assert(blockIds.size === 1)
-      val statuses = bmm.getBlockStatus(blockIds.head, askSlaves = true)
+    def afterUsingBroadcast(broadcastId: Long, bmm: BlockManagerMaster) {
+      val blockId = BroadcastBlockId(broadcastId)
+      val statuses = bmm.getBlockStatus(blockId, askSlaves = true)
       assert(statuses.size === numSlaves + 1)
       statuses.foreach { case (_, status) =>
         assert(status.storageLevel === StorageLevel.MEMORY_AND_DISK)
@@ -157,21 +157,21 @@ class BroadcastSuite extends FunSuite with LocalSparkContext {
 
     // Verify that blocks are unpersisted on all executors, and on all nodes if removeFromDriver
     // is true. In the latter case, also verify that the broadcast file is deleted on the driver.
-    def afterUnpersist(blockIds: Seq[BroadcastBlockId], bmm: BlockManagerMaster) {
-      assert(blockIds.size === 1)
-      val statuses = bmm.getBlockStatus(blockIds.head, askSlaves = true)
+    def afterUnpersist(broadcastId: Long, bmm: BlockManagerMaster) {
+      val blockId = BroadcastBlockId(broadcastId)
+      val statuses = bmm.getBlockStatus(blockId, askSlaves = true)
       val expectedNumBlocks = if (removeFromDriver) 0 else 1
       val possiblyNot = if (removeFromDriver) "" else " not"
       assert(statuses.size === expectedNumBlocks,
         "Block should%s be unpersisted on the driver".format(possiblyNot))
       if (distributed && removeFromDriver) {
         // this file is only generated in distributed mode
-        assert(!HttpBroadcast.getFile(blockIds.head.broadcastId).exists,
+        assert(!HttpBroadcast.getFile(blockId.broadcastId).exists,
           "Broadcast file should%s be deleted".format(possiblyNot))
       }
     }
 
-    testUnpersistBroadcast(distributed, numSlaves, httpConf, getBlockIds, afterCreation,
+    testUnpersistBroadcast(distributed, numSlaves, httpConf, afterCreation,
       afterUsingBroadcast, afterUnpersist, removeFromDriver)
   }
 
@@ -185,67 +185,51 @@ class BroadcastSuite extends FunSuite with LocalSparkContext {
   private def testUnpersistTorrentBroadcast(distributed: Boolean, removeFromDriver: Boolean) {
     val numSlaves = if (distributed) 2 else 0
 
-    def getBlockIds(id: Long) = {
-      val broadcastBlockId = BroadcastBlockId(id)
-      val metaBlockId = BroadcastBlockId(id, "meta")
-      // Assume broadcast value is small enough to fit into 1 piece
-      val pieceBlockId = BroadcastBlockId(id, "piece0")
-      if (distributed) {
-        // the metadata and piece blocks are generated only in distributed mode
-        Seq[BroadcastBlockId](broadcastBlockId, metaBlockId, pieceBlockId)
-      } else {
-        Seq[BroadcastBlockId](broadcastBlockId)
-      }
+    // Verify that blocks are persisted only on the driver
+    def afterCreation(broadcastId: Long, bmm: BlockManagerMaster) {
+      var blockId = BroadcastBlockId(broadcastId)
+      var statuses = bmm.getBlockStatus(blockId, askSlaves = true)
+      assert(statuses.size === 1)
+
+      blockId = BroadcastBlockId(broadcastId, "piece0")
+      statuses = bmm.getBlockStatus(blockId, askSlaves = true)
+      assert(statuses.size === (if (distributed) 1 else 0))
     }
 
-    // Verify that blocks are persisted only on the driver
-    def afterCreation(blockIds: Seq[BroadcastBlockId], bmm: BlockManagerMaster) {
-      blockIds.foreach { blockId =>
-        val statuses = bmm.getBlockStatus(blockIds.head, askSlaves = true)
+    // Verify that blocks are persisted in both the executors and the driver
+    def afterUsingBroadcast(broadcastId: Long, bmm: BlockManagerMaster) {
+      var blockId = BroadcastBlockId(broadcastId)
+      var statuses = bmm.getBlockStatus(blockId, askSlaves = true)
+      if (distributed) {
+        assert(statuses.size === numSlaves + 1)
+      } else {
         assert(statuses.size === 1)
-        statuses.head match { case (bm, status) =>
-          assert(bm.executorId === "<driver>", "Block should only be on the driver")
-          assert(status.storageLevel === StorageLevel.MEMORY_AND_DISK)
-          assert(status.memSize > 0, "Block should be in memory store on the driver")
-          assert(status.diskSize === 0, "Block should not be in disk store on the driver")
-        }
       }
-    }
 
-    // Verify that blocks are persisted in both the executors and the driver
-    def afterUsingBroadcast(blockIds: Seq[BroadcastBlockId], bmm: BlockManagerMaster) {
-      blockIds.foreach { blockId =>
-        val statuses = bmm.getBlockStatus(blockId, askSlaves = true)
-        if (blockId.field == "meta") {
-          // Meta data is only on the driver
-          assert(statuses.size === 1)
-          statuses.head match { case (bm, _) => assert(bm.executorId === "<driver>") }
-        } else {
-          // Other blocks are on both the executors and the driver
-          assert(statuses.size === numSlaves + 1,
-            blockId + " has " + statuses.size + " statuses: " + statuses.mkString(","))
-          statuses.foreach { case (_, status) =>
-            assert(status.storageLevel === StorageLevel.MEMORY_AND_DISK)
-            assert(status.memSize > 0, "Block should be in memory store")
-            assert(status.diskSize === 0, "Block should not be in disk store")
-          }
-        }
+      blockId = BroadcastBlockId(broadcastId, "piece0")
+      statuses = bmm.getBlockStatus(blockId, askSlaves = true)
+      if (distributed) {
+        assert(statuses.size === numSlaves + 1)
+      } else {
+        assert(statuses.size === 0)
       }
     }
 
     // Verify that blocks are unpersisted on all executors, and on all nodes if removeFromDriver
     // is true.
-    def afterUnpersist(blockIds: Seq[BroadcastBlockId], bmm: BlockManagerMaster) {
-      val expectedNumBlocks = if (removeFromDriver) 0 else 1
-      val possiblyNot = if (removeFromDriver) "" else " not"
-      blockIds.foreach { blockId =>
-        val statuses = bmm.getBlockStatus(blockId, askSlaves = true)
-        assert(statuses.size === expectedNumBlocks,
-          "Block should%s be unpersisted on the driver".format(possiblyNot))
-      }
+    def afterUnpersist(broadcastId: Long, bmm: BlockManagerMaster) {
+      var blockId = BroadcastBlockId(broadcastId)
+      var expectedNumBlocks = if (removeFromDriver) 0 else 1
+      var statuses = bmm.getBlockStatus(blockId, askSlaves = true)
+      assert(statuses.size === expectedNumBlocks)
+
+      blockId = BroadcastBlockId(broadcastId, "piece0")
+      expectedNumBlocks = if (removeFromDriver || !distributed) 0 else 1
+      statuses = bmm.getBlockStatus(blockId, askSlaves = true)
+      assert(statuses.size === expectedNumBlocks)
     }
 
-    testUnpersistBroadcast(distributed, numSlaves,  torrentConf, getBlockIds, afterCreation,
+    testUnpersistBroadcast(distributed, numSlaves,  torrentConf, afterCreation,
       afterUsingBroadcast, afterUnpersist, removeFromDriver)
   }
 
@@ -262,10 +246,9 @@ class BroadcastSuite extends FunSuite with LocalSparkContext {
       distributed: Boolean,
       numSlaves: Int,  // used only when distributed = true
       broadcastConf: SparkConf,
-      getBlockIds: Long => Seq[BroadcastBlockId],
-      afterCreation: (Seq[BroadcastBlockId], BlockManagerMaster) => Unit,
-      afterUsingBroadcast: (Seq[BroadcastBlockId], BlockManagerMaster) => Unit,
-      afterUnpersist: (Seq[BroadcastBlockId], BlockManagerMaster) => Unit,
+      afterCreation: (Long, BlockManagerMaster) => Unit,
+      afterUsingBroadcast: (Long, BlockManagerMaster) => Unit,
+      afterUnpersist: (Long, BlockManagerMaster) => Unit,
       removeFromDriver: Boolean) {
 
     sc = if (distributed) {
@@ -278,15 +261,14 @@ class BroadcastSuite extends FunSuite with LocalSparkContext {
 
     // Create broadcast variable
     val broadcast = sc.broadcast(list)
-    val blocks = getBlockIds(broadcast.id)
-    afterCreation(blocks, blockManagerMaster)
+    afterCreation(broadcast.id, blockManagerMaster)
 
     // Use broadcast variable on all executors
     val partitions = 10
     assert(partitions > numSlaves)
     val results = sc.parallelize(1 to partitions, partitions).map(x => (x, broadcast.value.sum))
     assert(results.collect().toSet === (1 to partitions).map(x => (x, list.sum)).toSet)
-    afterUsingBroadcast(blocks, blockManagerMaster)
+    afterUsingBroadcast(broadcast.id, blockManagerMaster)
 
     // Unpersist broadcast
     if (removeFromDriver) {
@@ -294,7 +276,7 @@ class BroadcastSuite extends FunSuite with LocalSparkContext {
     } else {
       broadcast.unpersist(blocking = true)
     }
-    afterUnpersist(blocks, blockManagerMaster)
+    afterUnpersist(broadcast.id, blockManagerMaster)
 
     // If the broadcast is removed from driver, all subsequent uses of the broadcast variable
     // should throw SparkExceptions. Otherwise, the result should be the same as before.
diff --git a/core/src/test/scala/org/apache/spark/network/netty/ServerClientIntegrationSuite.scala b/core/src/test/scala/org/apache/spark/network/netty/ServerClientIntegrationSuite.scala
new file mode 100644
index 0000000000000..02d0ffc86f58f
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/network/netty/ServerClientIntegrationSuite.scala
@@ -0,0 +1,161 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.netty
+
+import java.io.{RandomAccessFile, File}
+import java.nio.ByteBuffer
+import java.util.{Collections, HashSet}
+import java.util.concurrent.{TimeUnit, Semaphore}
+
+import scala.collection.JavaConversions._
+
+import io.netty.buffer.{ByteBufUtil, Unpooled}
+
+import org.scalatest.{BeforeAndAfterAll, FunSuite}
+
+import org.apache.spark.SparkConf
+import org.apache.spark.network.netty.client.{BlockClientListener, ReferenceCountedBuffer, BlockFetchingClientFactory}
+import org.apache.spark.network.netty.server.BlockServer
+import org.apache.spark.storage.{FileSegment, BlockDataProvider}
+
+
+/**
+ * Test suite that makes sure the server and the client implementations share the same protocol.
+ */
+class ServerClientIntegrationSuite extends FunSuite with BeforeAndAfterAll {
+
+  val bufSize = 100000
+  var buf: ByteBuffer = _
+  var testFile: File = _
+  var server: BlockServer = _
+  var clientFactory: BlockFetchingClientFactory = _
+
+  val bufferBlockId = "buffer_block"
+  val fileBlockId = "file_block"
+
+  val fileContent = new Array[Byte](1024)
+  scala.util.Random.nextBytes(fileContent)
+
+  override def beforeAll() = {
+    buf = ByteBuffer.allocate(bufSize)
+    for (i <- 1 to bufSize) {
+      buf.put(i.toByte)
+    }
+    buf.flip()
+
+    testFile = File.createTempFile("netty-test-file", "txt")
+    val fp = new RandomAccessFile(testFile, "rw")
+    fp.write(fileContent)
+    fp.close()
+
+    server = new BlockServer(new SparkConf, new BlockDataProvider {
+      override def getBlockData(blockId: String): Either[FileSegment, ByteBuffer] = {
+        if (blockId == bufferBlockId) {
+          Right(buf)
+        } else if (blockId == fileBlockId) {
+          Left(new FileSegment(testFile, 10, testFile.length - 25))
+        } else {
+          throw new Exception("Unknown block id " + blockId)
+        }
+      }
+    })
+
+    clientFactory = new BlockFetchingClientFactory(new SparkConf)
+  }
+
+  override def afterAll() = {
+    server.stop()
+    clientFactory.stop()
+  }
+
+  /** A ByteBuf for buffer_block */
+  lazy val byteBufferBlockReference = Unpooled.wrappedBuffer(buf)
+
+  /** A ByteBuf for file_block */
+  lazy val fileBlockReference = Unpooled.wrappedBuffer(fileContent, 10, fileContent.length - 25)
+
+  def fetchBlocks(blockIds: Seq[String]): (Set[String], Set[ReferenceCountedBuffer], Set[String]) =
+  {
+    val client = clientFactory.createClient(server.hostName, server.port)
+    val sem = new Semaphore(0)
+    val receivedBlockIds = Collections.synchronizedSet(new HashSet[String])
+    val errorBlockIds = Collections.synchronizedSet(new HashSet[String])
+    val receivedBuffers = Collections.synchronizedSet(new HashSet[ReferenceCountedBuffer])
+
+    client.fetchBlocks(
+      blockIds,
+      new BlockClientListener {
+        override def onFetchFailure(blockId: String, errorMsg: String): Unit = {
+          errorBlockIds.add(blockId)
+          sem.release()
+        }
+
+        override def onFetchSuccess(blockId: String, data: ReferenceCountedBuffer): Unit = {
+          receivedBlockIds.add(blockId)
+          data.retain()
+          receivedBuffers.add(data)
+          sem.release()
+        }
+      }
+    )
+    if (!sem.tryAcquire(blockIds.size, 30, TimeUnit.SECONDS)) {
+      fail("Timeout getting response from the server")
+    }
+    client.close()
+    (receivedBlockIds.toSet, receivedBuffers.toSet, errorBlockIds.toSet)
+  }
+
+  test("fetch a ByteBuffer block") {
+    val (blockIds, buffers, failBlockIds) = fetchBlocks(Seq(bufferBlockId))
+    assert(blockIds === Set(bufferBlockId))
+    assert(buffers.map(_.underlying) === Set(byteBufferBlockReference))
+    assert(failBlockIds.isEmpty)
+    buffers.foreach(_.release())
+  }
+
+  test("fetch a FileSegment block via zero-copy send") {
+    val (blockIds, buffers, failBlockIds) = fetchBlocks(Seq(fileBlockId))
+    assert(blockIds === Set(fileBlockId))
+    assert(buffers.map(_.underlying) === Set(fileBlockReference))
+    assert(failBlockIds.isEmpty)
+    buffers.foreach(_.release())
+  }
+
+  test("fetch a non-existent block") {
+    val (blockIds, buffers, failBlockIds) = fetchBlocks(Seq("random-block"))
+    assert(blockIds.isEmpty)
+    assert(buffers.isEmpty)
+    assert(failBlockIds === Set("random-block"))
+  }
+
+  test("fetch both ByteBuffer block and FileSegment block") {
+    val (blockIds, buffers, failBlockIds) = fetchBlocks(Seq(bufferBlockId, fileBlockId))
+    assert(blockIds === Set(bufferBlockId, fileBlockId))
+    assert(buffers.map(_.underlying) === Set(byteBufferBlockReference, fileBlockReference))
+    assert(failBlockIds.isEmpty)
+    buffers.foreach(_.release())
+  }
+
+  test("fetch both ByteBuffer block and a non-existent block") {
+    val (blockIds, buffers, failBlockIds) = fetchBlocks(Seq(bufferBlockId, "random-block"))
+    assert(blockIds === Set(bufferBlockId))
+    assert(buffers.map(_.underlying) === Set(byteBufferBlockReference))
+    assert(failBlockIds === Set("random-block"))
+    buffers.foreach(_.release())
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/network/netty/client/BlockFetchingClientHandlerSuite.scala b/core/src/test/scala/org/apache/spark/network/netty/client/BlockFetchingClientHandlerSuite.scala
new file mode 100644
index 0000000000000..903ab09ae4322
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/network/netty/client/BlockFetchingClientHandlerSuite.scala
@@ -0,0 +1,105 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.netty.client
+
+import java.nio.ByteBuffer
+
+import io.netty.buffer.Unpooled
+import io.netty.channel.embedded.EmbeddedChannel
+
+import org.scalatest.{PrivateMethodTester, FunSuite}
+
+
+class BlockFetchingClientHandlerSuite extends FunSuite with PrivateMethodTester {
+
+  test("handling block data (successful fetch)") {
+    val blockId = "test_block"
+    val blockData = "blahblahblahblahblah"
+    val totalLength = 4 + blockId.length + blockData.length
+
+    var parsedBlockId: String = ""
+    var parsedBlockData: String = ""
+    val handler = new BlockFetchingClientHandler
+    handler.addRequest(blockId,
+      new BlockClientListener {
+        override def onFetchFailure(blockId: String, errorMsg: String): Unit = ???
+        override def onFetchSuccess(bid: String, refCntBuf: ReferenceCountedBuffer): Unit = {
+          parsedBlockId = bid
+          val bytes = new Array[Byte](refCntBuf.byteBuffer().remaining)
+          refCntBuf.byteBuffer().get(bytes)
+          parsedBlockData = new String(bytes)
+        }
+      }
+    )
+
+    val outstandingRequests = PrivateMethod[java.util.Map[_, _]]('outstandingRequests)
+    assert(handler.invokePrivate(outstandingRequests()).size === 1)
+
+    val channel = new EmbeddedChannel(handler)
+    val buf = ByteBuffer.allocate(totalLength + 4)  // 4 bytes for the length field itself
+    buf.putInt(totalLength)
+    buf.putInt(blockId.length)
+    buf.put(blockId.getBytes)
+    buf.put(blockData.getBytes)
+    buf.flip()
+
+    channel.writeInbound(Unpooled.wrappedBuffer(buf))
+    assert(parsedBlockId === blockId)
+    assert(parsedBlockData === blockData)
+
+    assert(handler.invokePrivate(outstandingRequests()).size === 0)
+
+    channel.close()
+  }
+
+  test("handling error message (failed fetch)") {
+    val blockId = "test_block"
+    val errorMsg = "error erro5r error err4or error3 error6 error erro1r"
+    val totalLength = 4 + blockId.length + errorMsg.length
+
+    var parsedBlockId: String = ""
+    var parsedErrorMsg: String = ""
+    val handler = new BlockFetchingClientHandler
+    handler.addRequest(blockId, new BlockClientListener {
+      override def onFetchFailure(bid: String, msg: String) ={
+        parsedBlockId = bid
+        parsedErrorMsg = msg
+      }
+      override def onFetchSuccess(bid: String, refCntBuf: ReferenceCountedBuffer) = ???
+    })
+
+    val outstandingRequests = PrivateMethod[java.util.Map[_, _]]('outstandingRequests)
+    assert(handler.invokePrivate(outstandingRequests()).size === 1)
+
+    val channel = new EmbeddedChannel(handler)
+    val buf = ByteBuffer.allocate(totalLength + 4)  // 4 bytes for the length field itself
+    buf.putInt(totalLength)
+    buf.putInt(-blockId.length)
+    buf.put(blockId.getBytes)
+    buf.put(errorMsg.getBytes)
+    buf.flip()
+
+    channel.writeInbound(Unpooled.wrappedBuffer(buf))
+    assert(parsedBlockId === blockId)
+    assert(parsedErrorMsg === errorMsg)
+
+    assert(handler.invokePrivate(outstandingRequests()).size === 0)
+
+    channel.close()
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/network/netty/server/BlockHeaderEncoderSuite.scala b/core/src/test/scala/org/apache/spark/network/netty/server/BlockHeaderEncoderSuite.scala
new file mode 100644
index 0000000000000..3ee281cb1350b
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/network/netty/server/BlockHeaderEncoderSuite.scala
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.netty.server
+
+import io.netty.buffer.ByteBuf
+import io.netty.channel.embedded.EmbeddedChannel
+
+import org.scalatest.FunSuite
+
+
+class BlockHeaderEncoderSuite extends FunSuite {
+
+  test("encode normal block data") {
+    val blockId = "test_block"
+    val channel = new EmbeddedChannel(new BlockHeaderEncoder)
+    channel.writeOutbound(new BlockHeader(17, blockId, None))
+    val out = channel.readOutbound().asInstanceOf[ByteBuf]
+    assert(out.readInt() === 4 + blockId.length + 17)
+    assert(out.readInt() === blockId.length)
+
+    val blockIdBytes = new Array[Byte](blockId.length)
+    out.readBytes(blockIdBytes)
+    assert(new String(blockIdBytes) === blockId)
+    assert(out.readableBytes() === 0)
+
+    channel.close()
+  }
+
+  test("encode error message") {
+    val blockId = "error_block"
+    val errorMsg = "error encountered"
+    val channel = new EmbeddedChannel(new BlockHeaderEncoder)
+    channel.writeOutbound(new BlockHeader(17, blockId, Some(errorMsg)))
+    val out = channel.readOutbound().asInstanceOf[ByteBuf]
+    assert(out.readInt() === 4 + blockId.length + errorMsg.length)
+    assert(out.readInt() === -blockId.length)
+
+    val blockIdBytes = new Array[Byte](blockId.length)
+    out.readBytes(blockIdBytes)
+    assert(new String(blockIdBytes) === blockId)
+
+    val errorMsgBytes = new Array[Byte](errorMsg.length)
+    out.readBytes(errorMsgBytes)
+    assert(new String(errorMsgBytes) === errorMsg)
+    assert(out.readableBytes() === 0)
+
+    channel.close()
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/network/netty/server/BlockServerHandlerSuite.scala b/core/src/test/scala/org/apache/spark/network/netty/server/BlockServerHandlerSuite.scala
new file mode 100644
index 0000000000000..3239c710f1639
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/network/netty/server/BlockServerHandlerSuite.scala
@@ -0,0 +1,107 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.netty.server
+
+import java.io.{RandomAccessFile, File}
+import java.nio.ByteBuffer
+
+import io.netty.buffer.{Unpooled, ByteBuf}
+import io.netty.channel.{ChannelHandlerContext, SimpleChannelInboundHandler, DefaultFileRegion}
+import io.netty.channel.embedded.EmbeddedChannel
+
+import org.scalatest.FunSuite
+
+import org.apache.spark.storage.{BlockDataProvider, FileSegment}
+
+
+class BlockServerHandlerSuite extends FunSuite {
+
+  test("ByteBuffer block") {
+    val expectedBlockId = "test_bytebuffer_block"
+    val buf = ByteBuffer.allocate(10000)
+    for (i <- 1 to 10000) {
+      buf.put(i.toByte)
+    }
+    buf.flip()
+
+    val channel = new EmbeddedChannel(new BlockServerHandler(new BlockDataProvider {
+      override def getBlockData(blockId: String): Either[FileSegment, ByteBuffer] = Right(buf)
+    }))
+
+    channel.writeInbound(expectedBlockId)
+    assert(channel.outboundMessages().size === 2)
+
+    val out1 = channel.readOutbound().asInstanceOf[BlockHeader]
+    val out2 = channel.readOutbound().asInstanceOf[ByteBuf]
+
+    assert(out1.blockId === expectedBlockId)
+    assert(out1.blockSize === buf.remaining)
+    assert(out1.error === None)
+
+    assert(out2.equals(Unpooled.wrappedBuffer(buf)))
+
+    channel.close()
+  }
+
+  test("FileSegment block via zero-copy") {
+    val expectedBlockId = "test_file_block"
+
+    // Create random file data
+    val fileContent = new Array[Byte](1024)
+    scala.util.Random.nextBytes(fileContent)
+    val testFile = File.createTempFile("netty-test-file", "txt")
+    val fp = new RandomAccessFile(testFile, "rw")
+    fp.write(fileContent)
+    fp.close()
+
+    val channel = new EmbeddedChannel(new BlockServerHandler(new BlockDataProvider {
+      override def getBlockData(blockId: String): Either[FileSegment, ByteBuffer] = {
+        Left(new FileSegment(testFile, 15, testFile.length - 25))
+      }
+    }))
+
+    channel.writeInbound(expectedBlockId)
+    assert(channel.outboundMessages().size === 2)
+
+    val out1 = channel.readOutbound().asInstanceOf[BlockHeader]
+    val out2 = channel.readOutbound().asInstanceOf[DefaultFileRegion]
+
+    assert(out1.blockId === expectedBlockId)
+    assert(out1.blockSize === testFile.length - 25)
+    assert(out1.error === None)
+
+    assert(out2.count === testFile.length - 25)
+    assert(out2.position === 15)
+  }
+
+  test("pipeline exception propagation") {
+    val blockServerHandler = new BlockServerHandler(new BlockDataProvider {
+      override def getBlockData(blockId: String): Either[FileSegment, ByteBuffer] = ???
+    })
+    val exceptionHandler = new SimpleChannelInboundHandler[String]() {
+      override def channelRead0(ctx: ChannelHandlerContext, msg: String): Unit = {
+        throw new Exception("this is an error")
+      }
+    }
+
+    val channel = new EmbeddedChannel(exceptionHandler, blockServerHandler)
+    assert(channel.isOpen)
+    channel.writeInbound("a message to trigger the error")
+    assert(!channel.isOpen)
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
index 20bac66105a69..f32ce6f9fcc7f 100644
--- a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
@@ -825,8 +825,7 @@ class BlockManagerSuite extends FunSuite with Matchers with BeforeAndAfter
     val blockManager = mock(classOf[BlockManager])
     val shuffleBlockManager = mock(classOf[ShuffleBlockManager])
     when(shuffleBlockManager.conf).thenReturn(conf)
-    val diskBlockManager = new DiskBlockManager(shuffleBlockManager,
-      System.getProperty("java.io.tmpdir"))
+    val diskBlockManager = new DiskBlockManager(shuffleBlockManager, conf)
 
     when(blockManager.conf).thenReturn(conf.clone.set(confKey, 0.toString))
     val diskStoreMapped = new DiskStore(blockManager, diskBlockManager)
diff --git a/core/src/test/scala/org/apache/spark/storage/DiskBlockManagerSuite.scala b/core/src/test/scala/org/apache/spark/storage/DiskBlockManagerSuite.scala
index 777579bc570db..aabaeadd7a071 100644
--- a/core/src/test/scala/org/apache/spark/storage/DiskBlockManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/DiskBlockManagerSuite.scala
@@ -71,7 +71,9 @@ class DiskBlockManagerSuite extends FunSuite with BeforeAndAfterEach with Before
   }
 
   override def beforeEach() {
-    diskBlockManager = new DiskBlockManager(shuffleBlockManager, rootDirs)
+    val conf = testConf.clone
+    conf.set("spark.local.dir", rootDirs)
+    diskBlockManager = new DiskBlockManager(shuffleBlockManager, conf)
     shuffleBlockManager.idToSegmentMap.clear()
   }
 
diff --git a/core/src/test/scala/org/apache/spark/storage/LocalDirsSuite.scala b/core/src/test/scala/org/apache/spark/storage/LocalDirsSuite.scala
new file mode 100644
index 0000000000000..dae7bf0e336de
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/storage/LocalDirsSuite.scala
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.storage
+
+import java.io.File
+
+import org.apache.spark.util.Utils
+import org.scalatest.FunSuite
+
+import org.apache.spark.SparkConf
+
+
+/**
+ * Tests for the spark.local.dir and SPARK_LOCAL_DIRS configuration options.
+ */
+class LocalDirsSuite extends FunSuite {
+
+  test("Utils.getLocalDir() returns a valid directory, even if some local dirs are missing") {
+    // Regression test for SPARK-2974
+    assert(!new File("/NONEXISTENT_DIR").exists())
+    val conf = new SparkConf(false)
+      .set("spark.local.dir", s"/NONEXISTENT_PATH,${System.getProperty("java.io.tmpdir")}")
+    assert(new File(Utils.getLocalDir(conf)).exists())
+  }
+
+  test("SPARK_LOCAL_DIRS override also affects driver") {
+    // Regression test for SPARK-2975
+    assert(!new File("/NONEXISTENT_DIR").exists())
+    // SPARK_LOCAL_DIRS is a valid directory:
+    class MySparkConf extends SparkConf(false) {
+      override def getenv(name: String) = {
+        if (name == "SPARK_LOCAL_DIRS") System.getProperty("java.io.tmpdir")
+        else super.getenv(name)
+      }
+
+      override def clone: SparkConf = {
+        new MySparkConf().setAll(settings)
+      }
+    }
+    // spark.local.dir only contains invalid directories, but that's not a problem since
+    // SPARK_LOCAL_DIRS will override it on both the driver and workers:
+    val conf = new MySparkConf().set("spark.local.dir", "/NONEXISTENT_PATH")
+    assert(new File(Utils.getLocalDir(conf)).exists())
+  }
+
+}
diff --git a/dev/create-release/create-release.sh b/dev/create-release/create-release.sh
index 28f26d2368254..eab6313733dfd 100755
--- a/dev/create-release/create-release.sh
+++ b/dev/create-release/create-release.sh
@@ -60,14 +60,14 @@ if [[ ! "$@" =~ --package-only ]]; then
     -Dmaven.javadoc.skip=true \
     -Dhadoop.version=2.2.0 -Dyarn.version=2.2.0 \
     -Dtag=$GIT_TAG -DautoVersionSubmodules=true \
-    -Pyarn -Phive -Phive-thriftserver -Phadoop-2.2 -Pspark-ganglia-lgpl -Pkinesis-asl \
+    -Pyarn -Phive -Phadoop-2.2 -Pspark-ganglia-lgpl -Pkinesis-asl \
     --batch-mode release:prepare
 
   mvn -DskipTests \
     -Darguments="-DskipTests=true -Dmaven.javadoc.skip=true -Dhadoop.version=2.2.0 -Dyarn.version=2.2.0 -Dgpg.passphrase=${GPG_PASSPHRASE}" \
     -Dhadoop.version=2.2.0 -Dyarn.version=2.2.0 \
     -Dmaven.javadoc.skip=true \
-    -Pyarn -Phive -Phive-thriftserver -Phadoop-2.2 -Pspark-ganglia-lgpl -Pkinesis-asl \
+    -Pyarn -Phive -Phadoop-2.2 -Pspark-ganglia-lgpl -Pkinesis-asl \
     release:perform
 
   cd ..
@@ -117,12 +117,12 @@ make_binary_release() {
     spark-$RELEASE_VERSION-bin-$NAME.tgz.sha
 }
 
-make_binary_release "hadoop1" "-Phive -Phive-thriftserver -Dhadoop.version=1.0.4" &
-make_binary_release "cdh4" "-Phive -Phive-thriftserver -Dhadoop.version=2.0.0-mr1-cdh4.2.0" &
-make_binary_release "hadoop2" \
-  "-Phive -Phive-thriftserver -Pyarn -Phadoop-2.2 -Dhadoop.version=2.2.0 -Pyarn.version=2.2.0" &
-make_binary_release "hadoop2-without-hive" \
-  "-Pyarn -Phadoop-2.2 -Dhadoop.version=2.2.0 -Pyarn.version=2.2.0" &
+make_binary_release "hadoop1" "-Phive -Dhadoop.version=1.0.4" &
+make_binary_release "hadoop2.3" "-Phadoop-2.3 -Phive -Pyarn" &
+make_binary_release "hadoop2.4" "-Phadoop-2.4 -Phive -Pyarn" &
+make_binary_release "hadoop2.4-without-hive" "-Phadoop-2.4 -Pyarn" &
+make_binary_release "mapr3" "-Pmapr3 -Pyarn -Phive" &
+make_binary_release "mapr4" "-Pmapr4 -Pyarn -Phive" &
 wait
 
 # Copy data
diff --git a/dev/run-tests b/dev/run-tests
index 0e24515d1376c..d751961605dfd 100755
--- a/dev/run-tests
+++ b/dev/run-tests
@@ -55,10 +55,10 @@ JAVA_VERSION=$($java_cmd -version 2>&1 | sed 's/java version "\(.*\)\.\(.*\)\..*
 # Partial solution for SPARK-1455. Only run Hive tests if there are sql changes.
 if [ -n "$AMPLAB_JENKINS" ]; then
   git fetch origin master:master
-  diffs=`git diff --name-only master | grep "^sql/"`
+  diffs=`git diff --name-only master | grep "^\(sql/\)\|\(bin/spark-sql\)\|\(sbin/start-thriftserver.sh\)"`
   if [ -n "$diffs" ]; then
     echo "Detected changes in SQL. Will run Hive test suite."
-    export _RUN_SQL_TESTS=true # exported for PySpark tests
+    _RUN_SQL_TESTS=true
   fi
 fi
 
@@ -89,13 +89,22 @@ echo "========================================================================="
 echo "Running Spark unit tests"
 echo "========================================================================="
 
+# Build Spark; we always build with Hive because the PySpark SparkSQL tests need it.
+# echo "q" is needed because sbt on encountering a build file with failure
+# (either resolution or compilation) prompts the user for input either q, r,
+# etc to quit or retry. This echo is there to make it not block.
+BUILD_MVN_PROFILE_ARGS="$SBT_MAVEN_PROFILES_ARGS -Phive -Phive-thriftserver "
+echo -e "q\n" | sbt/sbt $BUILD_MVN_PROFILE_ARGS clean package assembly/assembly | \
+  grep -v -e "info.*Resolving" -e "warn.*Merging" -e "info.*Including"
+
+# If the Spark SQL tests are enabled, run the tests with the Hive profiles enabled:
 if [ -n "$_RUN_SQL_TESTS" ]; then
-  SBT_MAVEN_PROFILES_ARGS="$SBT_MAVEN_PROFILES_ARGS -Phive -Phive-thriftserver"
+  SBT_MAVEN_PROFILES_ARGS="$SBT_MAVEN_PROFILES_ARGS -Phive"
 fi
-# echo "q" is needed because sbt on encountering a build file with failure 
-# (either resolution or compilation) prompts the user for input either q, r, 
+# echo "q" is needed because sbt on encountering a build file with failure
+# (either resolution or compilation) prompts the user for input either q, r,
 # etc to quit or retry. This echo is there to make it not block.
-echo -e "q\n" | sbt/sbt $SBT_MAVEN_PROFILES_ARGS clean package assembly/assembly test | \
+echo -e "q\n" | sbt/sbt $SBT_MAVEN_PROFILES_ARGS test | \
   grep -v -e "info.*Resolving" -e "warn.*Merging" -e "info.*Including"
 
 echo ""
diff --git a/dev/scalastyle b/dev/scalastyle
index b53053a04ff42..eb9b467965636 100755
--- a/dev/scalastyle
+++ b/dev/scalastyle
@@ -17,7 +17,7 @@
 # limitations under the License.
 #
 
-echo -e "q\n" | sbt/sbt -Phive -Phive-thriftserver scalastyle > scalastyle.txt
+echo -e "q\n" | sbt/sbt -Phive scalastyle > scalastyle.txt
 # Check style with YARN alpha built too
 echo -e "q\n" | sbt/sbt -Pyarn -Phadoop-0.23 -Dhadoop.version=0.23.9 yarn-alpha/scalastyle \
   >> scalastyle.txt
diff --git a/docs/building-with-maven.md b/docs/building-with-maven.md
index 4d87ab92cec5b..a7d7bd3ccb1f2 100644
--- a/docs/building-with-maven.md
+++ b/docs/building-with-maven.md
@@ -98,12 +98,8 @@ mvn -Pyarn-alpha -Phadoop-2.3 -Dhadoop.version=2.3.0 -Dyarn.version=0.23.7 -Dski
 
 # Building Thrift JDBC server and CLI for Spark SQL
 
-Spark SQL supports Thrift JDBC server and CLI.
-See sql-programming-guide.md for more information about those features.
-You can use those features by setting `-Phive-thriftserver` when building Spark as follows.
-{% highlight bash %}
-mvn -Phive-thriftserver assembly
-{% endhighlight %}
+Spark SQL supports Thrift JDBC server and CLI. See sql-programming-guide.md for 
+more information about the JDBC server.
 
 # Spark Tests in Maven
 
diff --git a/docs/ec2-scripts.md b/docs/ec2-scripts.md
index 156a727026790..f5ac6d894e1eb 100644
--- a/docs/ec2-scripts.md
+++ b/docs/ec2-scripts.md
@@ -12,14 +12,16 @@ on the [Amazon Web Services site](http://aws.amazon.com/).
 
 `spark-ec2` is designed to manage multiple named clusters. You can
 launch a new cluster (telling the script its size and giving it a name),
-shutdown an existing cluster, or log into a cluster. Each cluster is
-identified by placing its machines into EC2 security groups whose names
-are derived from the name of the cluster. For example, a cluster named
+shutdown an existing cluster, or log into a cluster. Each cluster
+launches a set of instances, which are tagged with the cluster name,
+and placed into EC2 security groups.  If you don't specify a security
+group, the `spark-ec2` script will create security groups based on the
+cluster name you request. For example, a cluster named
 `test` will contain a master node in a security group called
 `test-master`, and a number of slave nodes in a security group called
-`test-slaves`. The `spark-ec2` script will create these security groups
-for you based on the cluster name you request. You can also use them to
-identify machines belonging to each cluster in the Amazon EC2 Console.
+`test-slaves`.  You can also specify a security group prefix to be used
+in place of the cluster name.  Machines in a cluster can be identified
+by looking for the "Name" tag of the instance in the Amazon EC2 Console.
 
 
 # Before You Start
diff --git a/docs/mllib-decision-tree.md b/docs/mllib-decision-tree.md
index 9cbd880897578..c01a92a9a1b26 100644
--- a/docs/mllib-decision-tree.md
+++ b/docs/mllib-decision-tree.md
@@ -84,8 +84,8 @@ Section 9.2.4 in
 [Elements of Statistical Machine Learning](http://statweb.stanford.edu/~tibs/ElemStatLearn/) for
 details). For example, for a binary classification problem with one categorical feature with three
 categories A, B and C with corresponding proportion of label 1 as 0.2, 0.6 and 0.4, the categorical
-features are ordered as A followed by C followed B or A, B, C. The two split candidates are A \| C, B
-and A , B \| C where \| denotes the split. A similar heuristic is used for multiclass classification
+features are ordered as A followed by C followed B or A, C, B. The two split candidates are A \| C, B
+and A , C \| B where \| denotes the split. A similar heuristic is used for multiclass classification
 when `$2^(M-1)-1$` is greater than the number of bins -- the impurity for each categorical feature value
 is used for ordering.
 
diff --git a/docs/mllib-guide.md b/docs/mllib-guide.md
index 23d5a0c4607af..ca0a84a8c53fd 100644
--- a/docs/mllib-guide.md
+++ b/docs/mllib-guide.md
@@ -9,7 +9,7 @@ filtering, dimensionality reduction, as well as underlying optimization primitiv
 
 * [Data types](mllib-basics.html)
 * [Basic statistics](mllib-stats.html)
-  * data generators  
+  * random data generation  
   * stratified sampling
   * summary statistics
   * hypothesis testing
diff --git a/docs/mllib-linear-methods.md b/docs/mllib-linear-methods.md
index e504cd7f0f578..9137f9dc1b692 100644
--- a/docs/mllib-linear-methods.md
+++ b/docs/mllib-linear-methods.md
@@ -518,6 +518,81 @@ print("Mean Squared Error = " + str(MSE))
 </div>
 </div>
 
+## Streaming linear regression
+
+When data arrive in a streaming fashion, it is useful to fit regression models online, 
+updating the parameters of the model as new data arrives. MLlib currently supports 
+streaming linear regression using ordinary least squares. The fitting is similar
+to that performed offline, except fitting occurs on each batch of data, so that
+the model continually updates to reflect the data from the stream.
+
+### Examples
+
+The following example demonstrates how to load training and testing data from two different
+input streams of text files, parse the streams as labeled points, fit a linear regression model
+online to the first stream, and make predictions on the second stream.
+
+<div class="codetabs">
+
+<div data-lang="scala" markdown="1">
+
+First, we import the necessary classes for parsing our input data and creating the model. 
+
+{% highlight scala %}
+
+import org.apache.spark.mllib.linalg.Vectors
+import org.apache.spark.mllib.regression.LabeledPoint
+import org.apache.spark.mllib.regression.StreamingLinearRegressionWithSGD
+
+{% endhighlight %}
+
+Then we make input streams for training and testing data. We assume a StreamingContext `ssc`
+has already been created, see [Spark Streaming Programming Guide](streaming-programming-guide.html#initializing)
+for more info. For this example, we use labeled points in training and testing streams, 
+but in practice you will likely want to use unlabeled vectors for test data.
+
+{% highlight scala %}
+
+val trainingData = ssc.textFileStream('/training/data/dir').map(LabeledPoint.parse)
+val testData = ssc.textFileStream('/testing/data/dir').map(LabeledPoint.parse)
+
+{% endhighlight %}
+
+We create our model by initializing the weights to 0
+
+{% highlight scala %}
+
+val numFeatures = 3
+val model = new StreamingLinearRegressionWithSGD()
+    .setInitialWeights(Vectors.zeros(numFeatures))
+
+{% endhighlight %}
+
+Now we register the streams for training and testing and start the job. 
+Printing predictions alongside true labels lets us easily see the result.
+
+{% highlight scala %}
+
+model.trainOn(trainingData)
+model.predictOnValues(testData.map(lp => (lp.label, lp.features))).print()
+
+ssc.start()
+ssc.awaitTermination()
+ 
+{% endhighlight %}
+
+We can now save text files with data to the training or testing folders.
+Each line should be a data point formatted as `(y,[x1,x2,x3])` where `y` is the label 
+and `x1,x2,x3` are the features. Anytime a text file is placed in `/training/data/dir` 
+the model will update. Anytime a text file is placed in `/testing/data/dir` you will see predictions. 
+As you feed more data to the training directory, the predictions 
+will get better!
+
+</div>
+
+</div>
+
+
 ## Implementation (developer)
 
 Behind the scene, MLlib implements a simple distributed version of stochastic gradient descent
diff --git a/docs/mllib-naive-bayes.md b/docs/mllib-naive-bayes.md
index 86d94aebd9442..7f9d4c6563944 100644
--- a/docs/mllib-naive-bayes.md
+++ b/docs/mllib-naive-bayes.md
@@ -17,7 +17,8 @@ Bayes](http://en.wikipedia.org/wiki/Naive_Bayes_classifier#Multinomial_naive_Bay
 which is typically used for [document
 classification](http://nlp.stanford.edu/IR-book/html/htmledition/naive-bayes-text-classification-1.html).
 Within that context, each observation is a document and each
-feature represents a term whose value is the frequency of the term. 
+feature represents a term whose value is the frequency of the term.
+Feature values must be nonnegative to represent term frequencies.
 [Additive smoothing](http://en.wikipedia.org/wiki/Lidstone_smoothing) can be used by
 setting the parameter $\lambda$ (default to $1.0$). For document classification, the input feature
 vectors are usually sparse, and sparse vectors should be supplied as input to take advantage of
diff --git a/docs/mllib-stats.md b/docs/mllib-stats.md
index ca9ef46c15186..f25dca746ba3a 100644
--- a/docs/mllib-stats.md
+++ b/docs/mllib-stats.md
@@ -25,7 +25,79 @@ displayTitle: <a href="mllib-guide.html">MLlib</a> - Statistics Functionality
 \newcommand{\zero}{\mathbf{0}}
 \]`
 
-## Data Generators 
+## Random data generation
+
+Random data generation is useful for randomized algorithms, prototyping, and performance testing.
+MLlib supports generating random RDDs with i.i.d. values drawn from a given distribution:
+uniform, standard normal, or Poisson.
+
+<div class="codetabs">
+<div data-lang="scala" markdown="1">
+[`RandomRDDs`](api/scala/index.html#org.apache.spark.mllib.random.RandomRDDs) provides factory
+methods to generate random double RDDs or vector RDDs.
+The following example generates a random double RDD, whose values follows the standard normal
+distribution `N(0, 1)`, and then map it to `N(1, 4)`.
+
+{% highlight scala %}
+import org.apache.spark.SparkContext
+import org.apache.spark.mllib.random.RandomRDDs._
+
+val sc: SparkContext = ...
+
+// Generate a random double RDD that contains 1 million i.i.d. values drawn from the
+// standard normal distribution `N(0, 1)`, evenly distributed in 10 partitions.
+val u = normalRDD(sc, 1000000L, 10)
+// Apply a transform to get a random double RDD following `N(1, 4)`.
+val v = u.map(x => 1.0 + 2.0 * x)
+{% endhighlight %}
+</div>
+
+<div data-lang="java" markdown="1">
+[`RandomRDDs`](api/java/index.html#org.apache.spark.mllib.random.RandomRDDs) provides factory
+methods to generate random double RDDs or vector RDDs.
+The following example generates a random double RDD, whose values follows the standard normal
+distribution `N(0, 1)`, and then map it to `N(1, 4)`.
+
+{% highlight java %}
+import org.apache.spark.SparkContext;
+import org.apache.spark.api.JavaDoubleRDD;
+import static org.apache.spark.mllib.random.RandomRDDs.*;
+
+JavaSparkContext jsc = ...
+
+// Generate a random double RDD that contains 1 million i.i.d. values drawn from the
+// standard normal distribution `N(0, 1)`, evenly distributed in 10 partitions.
+JavaDoubleRDD u = normalJavaRDD(jsc, 1000000L, 10);
+// Apply a transform to get a random double RDD following `N(1, 4)`.
+JavaDoubleRDD v = u.map(
+  new Function<Double, Double>() {
+    public Double call(Double x) {
+      return 1.0 + 2.0 * x;
+    }
+  });
+{% endhighlight %}
+</div>
+
+<div data-lang="python" markdown="1">
+[`RandomRDDs`](api/python/pyspark.mllib.random.RandomRDDs-class.html) provides factory
+methods to generate random double RDDs or vector RDDs.
+The following example generates a random double RDD, whose values follows the standard normal
+distribution `N(0, 1)`, and then map it to `N(1, 4)`.
+
+{% highlight python %}
+from pyspark.mllib.random import RandomRDDs
+
+sc = ... # SparkContext
+
+# Generate a random double RDD that contains 1 million i.i.d. values drawn from the
+# standard normal distribution `N(0, 1)`, evenly distributed in 10 partitions.
+u = RandomRDDs.uniformRDD(sc, 1000000L, 10)
+# Apply a transform to get a random double RDD following `N(1, 4)`.
+v = u.map(lambda x: 1.0 + 2.0 * x)
+{% endhighlight %}
+</div>
+
+</div>
 
 ## Stratified Sampling 
 
diff --git a/docs/sql-programming-guide.md b/docs/sql-programming-guide.md
index 34accade36ea9..c41f2804a6021 100644
--- a/docs/sql-programming-guide.md
+++ b/docs/sql-programming-guide.md
@@ -578,9 +578,7 @@ evaluated by the SQL execution engine.  A full list of the functions supported c
 
 The Thrift JDBC server implemented here corresponds to the [`HiveServer2`]
 (https://cwiki.apache.org/confluence/display/Hive/Setting+Up+HiveServer2) in Hive 0.12. You can test
-the JDBC server with the beeline script comes with either Spark or Hive 0.12.  In order to use Hive
-you must first run '`sbt/sbt -Phive-thriftserver assembly/assembly`' (or use `-Phive-thriftserver`
-for maven).
+the JDBC server with the beeline script comes with either Spark or Hive 0.12.
 
 To start the JDBC server, run the following in the Spark directory:
 
diff --git a/docs/streaming-custom-receivers.md b/docs/streaming-custom-receivers.md
index 1e045a3dd0ca9..27cd085782f66 100644
--- a/docs/streaming-custom-receivers.md
+++ b/docs/streaming-custom-receivers.md
@@ -186,7 +186,7 @@ JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>()
 ...
 {% endhighlight %}
 
-The full source code is in the example [JavaCustomReceiver.java](https://github.com/apache/spark/blob/master/examples/src/main/java/org/apache/spark/streaming/examples/JavaCustomReceiver.java).
+The full source code is in the example [JavaCustomReceiver.java](https://github.com/apache/spark/blob/master/examples/src/main/java/org/apache/spark/examples/streaming/JavaCustomReceiver.java).
 
 </div>
 </div>
@@ -215,7 +215,7 @@ And a new input stream can be created with this custom actor as
 val lines = ssc.actorStream[String](Props(new CustomActor()), "CustomReceiver")
 {% endhighlight %}
 
-See [ActorWordCount.scala](https://github.com/apache/spark/blob/master/examples/src/main/scala/org/apache/spark/streaming/examples/ActorWordCount.scala)
+See [ActorWordCount.scala](https://github.com/apache/spark/blob/master/examples/src/main/scala/org/apache/spark/examples/streaming/ActorWordCount.scala)
 for an end-to-end example.
 
 
diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py
index 0c2f85a3868f4..3a8c816cfffa1 100755
--- a/ec2/spark_ec2.py
+++ b/ec2/spark_ec2.py
@@ -124,7 +124,7 @@ def parse_args():
         help="The SSH user you want to connect as (default: root)")
     parser.add_option(
         "--delete-groups", action="store_true", default=False,
-        help="When destroying a cluster, delete the security groups that were created")
+        help="When destroying a cluster, delete the security groups that were created.")
     parser.add_option(
         "--use-existing-master", action="store_true", default=False,
         help="Launch fresh slaves, but use an existing stopped master if possible")
@@ -138,7 +138,9 @@ def parse_args():
     parser.add_option(
         "--user-data", type="string", default="",
         help="Path to a user-data file (most AMI's interpret this as an initialization script)")
-
+    parser.add_option(
+        "--security-group-prefix", type="string", default=None,
+        help="Use this prefix for the security group rather than the cluster name.")
 
     (opts, args) = parser.parse_args()
     if len(args) != 2:
@@ -285,8 +287,12 @@ def launch_cluster(conn, opts, cluster_name):
             user_data_content = user_data_file.read()
 
     print "Setting up security groups..."
-    master_group = get_or_make_group(conn, cluster_name + "-master")
-    slave_group = get_or_make_group(conn, cluster_name + "-slaves")
+    if opts.security_group_prefix is None:
+        master_group = get_or_make_group(conn, cluster_name + "-master")
+        slave_group = get_or_make_group(conn, cluster_name + "-slaves")
+    else:
+        master_group = get_or_make_group(conn, opts.security_group_prefix + "-master")
+        slave_group = get_or_make_group(conn, opts.security_group_prefix + "-slaves")
     if master_group.rules == []:  # Group was just now created
         master_group.authorize(src_group=master_group)
         master_group.authorize(src_group=slave_group)
@@ -310,12 +316,11 @@ def launch_cluster(conn, opts, cluster_name):
         slave_group.authorize('tcp', 60060, 60060, '0.0.0.0/0')
         slave_group.authorize('tcp', 60075, 60075, '0.0.0.0/0')
 
-    # Check if instances are already running in our groups
+    # Check if instances are already running with the cluster name
     existing_masters, existing_slaves = get_existing_cluster(conn, opts, cluster_name,
                                                              die_on_error=False)
     if existing_slaves or (existing_masters and not opts.use_existing_master):
-        print >> stderr, ("ERROR: There are already instances running in " +
-                          "group %s or %s" % (master_group.name, slave_group.name))
+        print >> stderr, ("ERROR: There are already instances for name: %s " % cluster_name)
         sys.exit(1)
 
     # Figure out Spark AMI
@@ -371,9 +376,13 @@ def launch_cluster(conn, opts, cluster_name):
                 for r in reqs:
                     id_to_req[r.id] = r
                 active_instance_ids = []
+                outstanding_request_ids = []
                 for i in my_req_ids:
-                    if i in id_to_req and id_to_req[i].state == "active":
-                        active_instance_ids.append(id_to_req[i].instance_id)
+                    if i in id_to_req:
+                        if id_to_req[i].state == "active":
+                            active_instance_ids.append(id_to_req[i].instance_id)
+                        else:
+                            outstanding_request_ids.append(i)
                 if len(active_instance_ids) == opts.slaves:
                     print "All %d slaves granted" % opts.slaves
                     reservations = conn.get_all_instances(active_instance_ids)
@@ -382,8 +391,8 @@ def launch_cluster(conn, opts, cluster_name):
                         slave_nodes += r.instances
                     break
                 else:
-                    print "%d of %d slaves granted, waiting longer" % (
-                        len(active_instance_ids), opts.slaves)
+                    print "%d of %d slaves granted, waiting longer for request ids including %s" % (
+                        len(active_instance_ids), opts.slaves, outstanding_request_ids[0:10])
         except:
             print "Canceling spot instance requests"
             conn.cancel_spot_instance_requests(my_req_ids)
@@ -440,14 +449,29 @@ def launch_cluster(conn, opts, cluster_name):
         print "Launched master in %s, regid = %s" % (zone, master_res.id)
 
     # Give the instances descriptive names
+    # TODO: Add retry logic for tagging with name since it's used to identify a cluster.
     for master in master_nodes:
-        master.add_tag(
-            key='Name',
-            value='{cn}-master-{iid}'.format(cn=cluster_name, iid=master.id))
+        name = '{cn}-master-{iid}'.format(cn=cluster_name, iid=master.id)
+        for i in range(0, 5):
+            try:
+                master.add_tag(key='Name', value=name)
+            except:
+                print "Failed attempt %i of 5 to tag %s" % ((i + 1), name)
+                if (i == 5):
+                    raise "Error - failed max attempts to add name tag"
+                time.sleep(5)
+
+
     for slave in slave_nodes:
-        slave.add_tag(
-            key='Name',
-            value='{cn}-slave-{iid}'.format(cn=cluster_name, iid=slave.id))
+        name = '{cn}-slave-{iid}'.format(cn=cluster_name, iid=slave.id)
+        for i in range(0, 5):
+            try:
+                slave.add_tag(key='Name', value=name)
+            except:
+                print "Failed attempt %i of 5 to tag %s" % ((i + 1), name)
+                if (i == 5):
+                    raise "Error - failed max attempts to add name tag"
+                time.sleep(5)
 
     # Return all the instances
     return (master_nodes, slave_nodes)
@@ -463,10 +487,10 @@ def get_existing_cluster(conn, opts, cluster_name, die_on_error=True):
     for res in reservations:
         active = [i for i in res.instances if is_active(i)]
         for inst in active:
-            group_names = [g.name for g in inst.groups]
-            if group_names == [cluster_name + "-master"]:
+            name = inst.tags.get(u'Name', "")
+            if name.startswith(cluster_name + "-master"):
                 master_nodes.append(inst)
-            elif group_names == [cluster_name + "-slaves"]:
+            elif name.startswith(cluster_name + "-slave"):
                 slave_nodes.append(inst)
     if any((master_nodes, slave_nodes)):
         print ("Found %d master(s), %d slaves" % (len(master_nodes), len(slave_nodes)))
@@ -474,7 +498,7 @@ def get_existing_cluster(conn, opts, cluster_name, die_on_error=True):
         return (master_nodes, slave_nodes)
     else:
         if master_nodes == [] and slave_nodes != []:
-            print >> sys.stderr, "ERROR: Could not find master in group " + cluster_name + "-master"
+            print >> sys.stderr, "ERROR: Could not find master in with name " + cluster_name + "-master"
         else:
             print >> sys.stderr, "ERROR: Could not find any existing cluster"
         sys.exit(1)
@@ -816,7 +840,10 @@ def real_main():
             # Delete security groups as well
             if opts.delete_groups:
                 print "Deleting security groups (this will take some time)..."
-                group_names = [cluster_name + "-master", cluster_name + "-slaves"]
+                if opts.security_group_prefix is None:
+                    group_names = [cluster_name + "-master", cluster_name + "-slaves"]
+                else:
+                    group_names = [opts.security_group_prefix + "-master", opts.security_group_prefix + "-slaves"]
 
                 attempt = 1
                 while attempt <= 3:
diff --git a/external/flume-sink/pom.xml b/external/flume-sink/pom.xml
index d0bf1cf1ea796..0c68defa5e101 100644
--- a/external/flume-sink/pom.xml
+++ b/external/flume-sink/pom.xml
@@ -72,6 +72,13 @@
       <groupId>org.scalatest</groupId>
       <artifactId>scalatest_${scala.binary.version}</artifactId>
     </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-streaming_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <type>test-jar</type>
+      <scope>test</scope> <!-- Need it only for tests, don't package it -->
+    </dependency>
   </dependencies>
   <build>
     <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
diff --git a/external/flume-sink/src/main/scala/org/apache/spark/streaming/flume/sink/SparkSink.scala b/external/flume-sink/src/main/scala/org/apache/spark/streaming/flume/sink/SparkSink.scala
index 948af5947f5e1..98ae7d783aec8 100644
--- a/external/flume-sink/src/main/scala/org/apache/spark/streaming/flume/sink/SparkSink.scala
+++ b/external/flume-sink/src/main/scala/org/apache/spark/streaming/flume/sink/SparkSink.scala
@@ -53,7 +53,6 @@ import org.apache.flume.sink.AbstractSink
  *
  */
 
-private[flume]
 class SparkSink extends AbstractSink with Logging with Configurable {
 
   // Size of the pool to use for holding transaction processors.
diff --git a/external/flume-sink/src/test/scala/org/apache/spark/streaming/flume/sink/SparkSinkSuite.scala b/external/flume-sink/src/test/scala/org/apache/spark/streaming/flume/sink/SparkSinkSuite.scala
new file mode 100644
index 0000000000000..44b27edf85ce8
--- /dev/null
+++ b/external/flume-sink/src/test/scala/org/apache/spark/streaming/flume/sink/SparkSinkSuite.scala
@@ -0,0 +1,204 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.streaming.flume.sink
+
+import java.net.InetSocketAddress
+import java.util.concurrent.atomic.AtomicInteger
+import java.util.concurrent.{TimeUnit, CountDownLatch, Executors}
+
+import scala.collection.JavaConversions._
+import scala.concurrent.{ExecutionContext, Future}
+import scala.util.{Failure, Success}
+
+import com.google.common.util.concurrent.ThreadFactoryBuilder
+import org.apache.avro.ipc.NettyTransceiver
+import org.apache.avro.ipc.specific.SpecificRequestor
+import org.apache.flume.Context
+import org.apache.flume.channel.MemoryChannel
+import org.apache.flume.event.EventBuilder
+import org.apache.spark.streaming.TestSuiteBase
+import org.jboss.netty.channel.socket.nio.NioClientSocketChannelFactory
+
+class SparkSinkSuite extends TestSuiteBase {
+  val eventsPerBatch = 1000
+  val channelCapacity = 5000
+
+  test("Success") {
+    val (channel, sink) = initializeChannelAndSink()
+    channel.start()
+    sink.start()
+
+    putEvents(channel, eventsPerBatch)
+
+    val port = sink.getPort
+    val address = new InetSocketAddress("0.0.0.0", port)
+
+    val (transceiver, client) = getTransceiverAndClient(address, 1)(0)
+    val events = client.getEventBatch(1000)
+    client.ack(events.getSequenceNumber)
+    assert(events.getEvents.size() === 1000)
+    assertChannelIsEmpty(channel)
+    sink.stop()
+    channel.stop()
+    transceiver.close()
+  }
+
+  test("Nack") {
+    val (channel, sink) = initializeChannelAndSink()
+    channel.start()
+    sink.start()
+    putEvents(channel, eventsPerBatch)
+
+    val port = sink.getPort
+    val address = new InetSocketAddress("0.0.0.0", port)
+
+    val (transceiver, client) = getTransceiverAndClient(address, 1)(0)
+    val events = client.getEventBatch(1000)
+    assert(events.getEvents.size() === 1000)
+    client.nack(events.getSequenceNumber)
+    assert(availableChannelSlots(channel) === 4000)
+    sink.stop()
+    channel.stop()
+    transceiver.close()
+  }
+
+  test("Timeout") {
+    val (channel, sink) = initializeChannelAndSink(Map(SparkSinkConfig
+      .CONF_TRANSACTION_TIMEOUT -> 1.toString))
+    channel.start()
+    sink.start()
+    putEvents(channel, eventsPerBatch)
+    val port = sink.getPort
+    val address = new InetSocketAddress("0.0.0.0", port)
+
+    val (transceiver, client) = getTransceiverAndClient(address, 1)(0)
+    val events = client.getEventBatch(1000)
+    assert(events.getEvents.size() === 1000)
+    Thread.sleep(1000)
+    assert(availableChannelSlots(channel) === 4000)
+    sink.stop()
+    channel.stop()
+    transceiver.close()
+  }
+
+  test("Multiple consumers") {
+    testMultipleConsumers(failSome = false)
+  }
+
+  test("Multiple consumers with some failures") {
+    testMultipleConsumers(failSome = true)
+  }
+
+  def testMultipleConsumers(failSome: Boolean): Unit = {
+    implicit val executorContext = ExecutionContext
+      .fromExecutorService(Executors.newFixedThreadPool(5))
+    val (channel, sink) = initializeChannelAndSink()
+    channel.start()
+    sink.start()
+    (1 to 5).foreach(_ => putEvents(channel, eventsPerBatch))
+    val port = sink.getPort
+    val address = new InetSocketAddress("0.0.0.0", port)
+    val transceiversAndClients = getTransceiverAndClient(address, 5)
+    val batchCounter = new CountDownLatch(5)
+    val counter = new AtomicInteger(0)
+    transceiversAndClients.foreach(x => {
+      Future {
+        val client = x._2
+        val events = client.getEventBatch(1000)
+        if (!failSome || counter.getAndIncrement() % 2 == 0) {
+          client.ack(events.getSequenceNumber)
+        } else {
+          client.nack(events.getSequenceNumber)
+          throw new RuntimeException("Sending NACK for failure!")
+        }
+        events
+      }.onComplete {
+        case Success(events) =>
+          assert(events.getEvents.size() === 1000)
+          batchCounter.countDown()
+        case Failure(t) =>
+          // Don't re-throw the exception, causes a nasty unnecessary stack trace on stdout
+          batchCounter.countDown()
+      }
+    })
+    batchCounter.await()
+    TimeUnit.SECONDS.sleep(1) // Allow the sink to commit the transactions.
+    executorContext.shutdown()
+    if(failSome) {
+      assert(availableChannelSlots(channel) === 3000)
+    } else {
+      assertChannelIsEmpty(channel)
+    }
+    sink.stop()
+    channel.stop()
+    transceiversAndClients.foreach(x => x._1.close())
+  }
+
+  private def initializeChannelAndSink(overrides: Map[String, String] = Map.empty): (MemoryChannel,
+    SparkSink) = {
+    val channel = new MemoryChannel()
+    val channelContext = new Context()
+
+    channelContext.put("capacity", channelCapacity.toString)
+    channelContext.put("transactionCapacity", 1000.toString)
+    channelContext.put("keep-alive", 0.toString)
+    channelContext.putAll(overrides)
+    channel.configure(channelContext)
+
+    val sink = new SparkSink()
+    val sinkContext = new Context()
+    sinkContext.put(SparkSinkConfig.CONF_HOSTNAME, "0.0.0.0")
+    sinkContext.put(SparkSinkConfig.CONF_PORT, 0.toString)
+    sink.configure(sinkContext)
+    sink.setChannel(channel)
+    (channel, sink)
+  }
+
+  private def putEvents(ch: MemoryChannel, count: Int): Unit = {
+    val tx = ch.getTransaction
+    tx.begin()
+    (1 to count).foreach(x => ch.put(EventBuilder.withBody(x.toString.getBytes)))
+    tx.commit()
+    tx.close()
+  }
+
+  private def getTransceiverAndClient(address: InetSocketAddress,
+    count: Int): Seq[(NettyTransceiver, SparkFlumeProtocol.Callback)] = {
+
+    (1 to count).map(_ => {
+      lazy val channelFactoryExecutor =
+        Executors.newCachedThreadPool(new ThreadFactoryBuilder().setDaemon(true).
+          setNameFormat("Flume Receiver Channel Thread - %d").build())
+      lazy val channelFactory =
+        new NioClientSocketChannelFactory(channelFactoryExecutor, channelFactoryExecutor)
+      val transceiver = new NettyTransceiver(address, channelFactory)
+      val client = SpecificRequestor.getClient(classOf[SparkFlumeProtocol.Callback], transceiver)
+      (transceiver, client)
+    })
+  }
+
+  private def assertChannelIsEmpty(channel: MemoryChannel): Unit = {
+    assert(availableChannelSlots(channel) === channelCapacity)
+  }
+
+  private def availableChannelSlots(channel: MemoryChannel): Int = {
+    val queueRemaining = channel.getClass.getDeclaredField("queueRemaining")
+    queueRemaining.setAccessible(true)
+    val m = queueRemaining.get(channel).getClass.getDeclaredMethod("availablePermits")
+    m.invoke(queueRemaining.get(channel)).asInstanceOf[Int]
+  }
+}
diff --git a/external/flume/src/test/scala/org/apache/spark/streaming/flume/FlumePollingStreamSuite.scala b/external/flume/src/test/scala/org/apache/spark/streaming/flume/FlumePollingStreamSuite.scala
index 8a85b0f987e42..32a19787a28e1 100644
--- a/external/flume/src/test/scala/org/apache/spark/streaming/flume/FlumePollingStreamSuite.scala
+++ b/external/flume/src/test/scala/org/apache/spark/streaming/flume/FlumePollingStreamSuite.scala
@@ -198,7 +198,7 @@ class FlumePollingStreamSuite extends TestSuiteBase {
   }
 
   def assertChannelIsEmpty(channel: MemoryChannel) = {
-    val queueRemaining = channel.getClass.getDeclaredField("queueRemaining");
+    val queueRemaining = channel.getClass.getDeclaredField("queueRemaining")
     queueRemaining.setAccessible(true)
     val m = queueRemaining.get(channel).getClass.getDeclaredMethod("availablePermits")
     assert(m.invoke(queueRemaining.get(channel)).asInstanceOf[Int] === 5000)
diff --git a/mllib/pom.xml b/mllib/pom.xml
index fc1ecfbea708f..c7a1e2ae75c84 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -91,6 +91,13 @@
       <artifactId>junit-interface</artifactId>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-streaming_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
   </dependencies>
   <profiles>
     <profile>
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala
index 6c7be0a4f1dcb..8c8e4a161aa5b 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala
@@ -19,9 +19,9 @@ package org.apache.spark.mllib.classification
 
 import breeze.linalg.{DenseMatrix => BDM, DenseVector => BDV, argmax => brzArgmax, sum => brzSum}
 
-import org.apache.spark.Logging
+import org.apache.spark.{SparkException, Logging}
 import org.apache.spark.SparkContext._
-import org.apache.spark.mllib.linalg.Vector
+import org.apache.spark.mllib.linalg.{DenseVector, SparseVector, Vector}
 import org.apache.spark.mllib.regression.LabeledPoint
 import org.apache.spark.rdd.RDD
 
@@ -73,7 +73,7 @@ class NaiveBayesModel private[mllib] (
  * This is the Multinomial NB ([[http://tinyurl.com/lsdw6p]]) which can handle all kinds of
  * discrete data.  For example, by converting documents into TF-IDF vectors, it can be used for
  * document classification.  By making every vector a 0-1 vector, it can also be used as
- * Bernoulli NB ([[http://tinyurl.com/p7c96j6]]).
+ * Bernoulli NB ([[http://tinyurl.com/p7c96j6]]). The input feature values must be nonnegative.
  */
 class NaiveBayes private (private var lambda: Double) extends Serializable with Logging {
 
@@ -91,12 +91,30 @@ class NaiveBayes private (private var lambda: Double) extends Serializable with
    * @param data RDD of [[org.apache.spark.mllib.regression.LabeledPoint]].
    */
   def run(data: RDD[LabeledPoint]) = {
+    val requireNonnegativeValues: Vector => Unit = (v: Vector) => {
+      val values = v match {
+        case sv: SparseVector =>
+          sv.values
+        case dv: DenseVector =>
+          dv.values
+      }
+      if (!values.forall(_ >= 0.0)) {
+        throw new SparkException(s"Naive Bayes requires nonnegative feature values but found $v.")
+      }
+    }
+
     // Aggregates term frequencies per label.
     // TODO: Calling combineByKey and collect creates two stages, we can implement something
     // TODO: similar to reduceByKeyLocally to save one stage.
     val aggregated = data.map(p => (p.label, p.features)).combineByKey[(Long, BDV[Double])](
-      createCombiner = (v: Vector) => (1L, v.toBreeze.toDenseVector),
-      mergeValue = (c: (Long, BDV[Double]), v: Vector) => (c._1 + 1L, c._2 += v.toBreeze),
+      createCombiner = (v: Vector) => {
+        requireNonnegativeValues(v)
+        (1L, v.toBreeze.toDenseVector)
+      },
+      mergeValue = (c: (Long, BDV[Double]), v: Vector) => {
+        requireNonnegativeValues(v)
+        (c._1 + 1L, c._2 += v.toBreeze)
+      },
       mergeCombiners = (c1: (Long, BDV[Double]), c2: (Long, BDV[Double])) =>
         (c1._1 + c2._1, c1._2 += c2._2)
     ).collect()
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala
index 1dcaa2cd2e630..fc1444705364a 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala
@@ -30,11 +30,9 @@ import org.apache.spark.SparkContext._
 import org.apache.spark.annotation.Experimental
 import org.apache.spark.api.java.JavaRDD
 import org.apache.spark.mllib.linalg.{Vector, Vectors}
-import org.apache.spark.mllib.rdd.RDDFunctions._
 import org.apache.spark.rdd._
 import org.apache.spark.util.Utils
 import org.apache.spark.util.random.XORShiftRandom
-import org.apache.spark.util.collection.PrimitiveKeyOpenHashMap
 
 /**
  *  Entry in vocabulary 
@@ -285,9 +283,9 @@ class Word2Vec extends Serializable with Logging {
     
     val newSentences = sentences.repartition(numPartitions).cache()
     val initRandom = new XORShiftRandom(seed)
-    var syn0Global =
+    val syn0Global =
       Array.fill[Float](vocabSize * vectorSize)((initRandom.nextFloat() - 0.5f) / vectorSize)
-    var syn1Global = new Array[Float](vocabSize * vectorSize)
+    val syn1Global = new Array[Float](vocabSize * vectorSize)
     var alpha = startingAlpha
     for (k <- 1 to numIterations) {
       val partial = newSentences.mapPartitionsWithIndex { case (idx, iter) =>
@@ -349,21 +347,22 @@ class Word2Vec extends Serializable with Logging {
         }
         val syn0Local = model._1
         val syn1Local = model._2
-        val synOut = new PrimitiveKeyOpenHashMap[Int, Array[Float]](vocabSize * 2)
-        var index = 0
-        while(index < vocabSize) {
-          if (syn0Modify(index) != 0) {
-            synOut.update(index, syn0Local.slice(index * vectorSize, (index + 1) * vectorSize))
+        // Only output modified vectors.
+        Iterator.tabulate(vocabSize) { index =>
+          if (syn0Modify(index) > 0) {
+            Some((index, syn0Local.slice(index * vectorSize, (index + 1) * vectorSize)))
+          } else {
+            None
           }
-          if (syn1Modify(index) != 0) {
-            synOut.update(index + vocabSize,
-              syn1Local.slice(index * vectorSize, (index + 1) * vectorSize))
+        }.flatten ++ Iterator.tabulate(vocabSize) { index =>
+          if (syn1Modify(index) > 0) {
+            Some((index + vocabSize, syn1Local.slice(index * vectorSize, (index + 1) * vectorSize)))
+          } else {
+            None
           }
-          index += 1
-        }
-        Iterator(synOut)
+        }.flatten
       }
-      val synAgg = partial.flatMap(x => x).reduceByKey { case (v1, v2) =>
+      val synAgg = partial.reduceByKey { case (v1, v2) =>
           blas.saxpy(vectorSize, 1.0f, v2, 1, v1, 1)
           v1
       }.collect()
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/random/RandomDataGenerator.scala b/mllib/src/main/scala/org/apache/spark/mllib/random/RandomDataGenerator.scala
index 9cab49f6ed1f0..28179fbc450c0 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/random/RandomDataGenerator.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/random/RandomDataGenerator.scala
@@ -20,14 +20,14 @@ package org.apache.spark.mllib.random
 import cern.jet.random.Poisson
 import cern.jet.random.engine.DRand
 
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.util.random.{XORShiftRandom, Pseudorandom}
 
 /**
- * :: Experimental ::
+ * :: DeveloperApi ::
  * Trait for random data generators that generate i.i.d. data.
  */
-@Experimental
+@DeveloperApi
 trait RandomDataGenerator[T] extends Pseudorandom with Serializable {
 
   /**
@@ -43,10 +43,10 @@ trait RandomDataGenerator[T] extends Pseudorandom with Serializable {
 }
 
 /**
- * :: Experimental ::
+ * :: DeveloperApi ::
  * Generates i.i.d. samples from U[0.0, 1.0]
  */
-@Experimental
+@DeveloperApi
 class UniformGenerator extends RandomDataGenerator[Double] {
 
   // XORShiftRandom for better performance. Thread safety isn't necessary here.
@@ -62,10 +62,10 @@ class UniformGenerator extends RandomDataGenerator[Double] {
 }
 
 /**
- * :: Experimental ::
+ * :: DeveloperApi ::
  * Generates i.i.d. samples from the standard normal distribution.
  */
-@Experimental
+@DeveloperApi
 class StandardNormalGenerator extends RandomDataGenerator[Double] {
 
   // XORShiftRandom for better performance. Thread safety isn't necessary here.
@@ -81,12 +81,12 @@ class StandardNormalGenerator extends RandomDataGenerator[Double] {
 }
 
 /**
- * :: Experimental ::
+ * :: DeveloperApi ::
  * Generates i.i.d. samples from the Poisson distribution with the given mean.
  *
  * @param mean mean for the Poisson distribution.
  */
-@Experimental
+@DeveloperApi
 class PoissonGenerator(val mean: Double) extends RandomDataGenerator[Double] {
 
   private var rng = new Poisson(mean, new DRand)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/random/RandomRDDs.scala b/mllib/src/main/scala/org/apache/spark/mllib/random/RandomRDDs.scala
index 36270369526cd..c5f4b084321f7 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/random/RandomRDDs.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/random/RandomRDDs.scala
@@ -20,9 +20,10 @@ package org.apache.spark.mllib.random
 import scala.reflect.ClassTag
 
 import org.apache.spark.SparkContext
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{DeveloperApi, Experimental}
+import org.apache.spark.api.java.{JavaDoubleRDD, JavaRDD, JavaSparkContext}
 import org.apache.spark.mllib.linalg.Vector
-import org.apache.spark.mllib.rdd.{RandomVectorRDD, RandomRDD}
+import org.apache.spark.mllib.rdd.{RandomRDD, RandomVectorRDD}
 import org.apache.spark.rdd.RDD
 import org.apache.spark.util.Utils
 
@@ -34,335 +35,279 @@ import org.apache.spark.util.Utils
 object RandomRDDs {
 
   /**
-   * :: Experimental ::
-   * Generates an RDD comprised of i.i.d. samples from the uniform distribution on [0.0, 1.0].
+   * Generates an RDD comprised of i.i.d. samples from the uniform distribution `U(0.0, 1.0)`.
    *
-   * To transform the distribution in the generated RDD from U[0.0, 1.0] to U[a, b], use
-   * `RandomRDDGenerators.uniformRDD(sc, n, p, seed).map(v => a + (b - a) * v)`.
+   * To transform the distribution in the generated RDD from `U(0.0, 1.0)` to `U(a, b)`, use
+   * `RandomRDDs.uniformRDD(sc, n, p, seed).map(v => a + (b - a) * v)`.
    *
    * @param sc SparkContext used to create the RDD.
    * @param size Size of the RDD.
-   * @param numPartitions Number of partitions in the RDD.
-   * @param seed Seed for the RNG that generates the seed for the generator in each partition.
-   * @return RDD[Double] comprised of i.i.d. samples ~ U[0.0, 1.0].
+   * @param numPartitions Number of partitions in the RDD (default: `sc.defaultParallelism`).
+   * @param seed Random seed (default: a random long integer).
+   * @return RDD[Double] comprised of i.i.d. samples ~ `U(0.0, 1.0)`.
    */
-  @Experimental
-  def uniformRDD(sc: SparkContext, size: Long, numPartitions: Int, seed: Long): RDD[Double] = {
+  def uniformRDD(
+      sc: SparkContext,
+      size: Long,
+      numPartitions: Int = 0,
+      seed: Long = Utils.random.nextLong()): RDD[Double] = {
     val uniform = new UniformGenerator()
-    randomRDD(sc, uniform,  size, numPartitions, seed)
+    randomRDD(sc, uniform,  size, numPartitionsOrDefault(sc, numPartitions), seed)
   }
 
   /**
-   * :: Experimental ::
-   * Generates an RDD comprised of i.i.d. samples from the uniform distribution on [0.0, 1.0].
-   *
-   * To transform the distribution in the generated RDD from U[0.0, 1.0] to U[a, b], use
-   * `RandomRDDGenerators.uniformRDD(sc, n, p).map(v => a + (b - a) * v)`.
-   *
-   * @param sc SparkContext used to create the RDD.
-   * @param size Size of the RDD.
-   * @param numPartitions Number of partitions in the RDD.
-   * @return RDD[Double] comprised of i.i.d. samples ~ U[0.0, 1.0].
+   * Java-friendly version of [[RandomRDDs#uniformRDD]].
    */
-  @Experimental
-  def uniformRDD(sc: SparkContext, size: Long, numPartitions: Int): RDD[Double] = {
-    uniformRDD(sc, size, numPartitions, Utils.random.nextLong)
+  def uniformJavaRDD(
+      jsc: JavaSparkContext,
+      size: Long,
+      numPartitions: Int,
+      seed: Long): JavaDoubleRDD = {
+    JavaDoubleRDD.fromRDD(uniformRDD(jsc.sc, size, numPartitions, seed))
   }
 
   /**
-   * :: Experimental ::
-   * Generates an RDD comprised of i.i.d. samples from the uniform distribution on [0.0, 1.0].
-   * sc.defaultParallelism used for the number of partitions in the RDD.
-   *
-   * To transform the distribution in the generated RDD from U[0.0, 1.0] to U[a, b], use
-   * `RandomRDDGenerators.uniformRDD(sc, n).map(v => a + (b - a) * v)`.
-   *
-   * @param sc SparkContext used to create the RDD.
-   * @param size Size of the RDD.
-   * @return RDD[Double] comprised of i.i.d. samples ~ U[0.0, 1.0].
+   * [[RandomRDDs#uniformJavaRDD]] with the default seed.
    */
-  @Experimental
-  def uniformRDD(sc: SparkContext, size: Long): RDD[Double] = {
-    uniformRDD(sc, size, sc.defaultParallelism, Utils.random.nextLong)
+  def uniformJavaRDD(jsc: JavaSparkContext, size: Long, numPartitions: Int): JavaDoubleRDD = {
+    JavaDoubleRDD.fromRDD(uniformRDD(jsc.sc, size, numPartitions))
   }
 
   /**
-   * :: Experimental ::
-   * Generates an RDD comprised of i.i.d. samples from the standard normal distribution.
-   *
-   * To transform the distribution in the generated RDD from standard normal to some other normal
-   * N(mean, sigma), use `RandomRDDGenerators.normalRDD(sc, n, p, seed).map(v => mean + sigma * v)`.
-   *
-   * @param sc SparkContext used to create the RDD.
-   * @param size Size of the RDD.
-   * @param numPartitions Number of partitions in the RDD.
-   * @param seed Seed for the RNG that generates the seed for the generator in each partition.
-   * @return RDD[Double] comprised of i.i.d. samples ~ N(0.0, 1.0).
+   * [[RandomRDDs#uniformJavaRDD]] with the default number of partitions and the default seed.
    */
-  @Experimental
-  def normalRDD(sc: SparkContext, size: Long, numPartitions: Int, seed: Long): RDD[Double] = {
-    val normal = new StandardNormalGenerator()
-    randomRDD(sc, normal, size, numPartitions, seed)
+  def uniformJavaRDD(jsc: JavaSparkContext, size: Long): JavaDoubleRDD = {
+    JavaDoubleRDD.fromRDD(uniformRDD(jsc.sc, size))
   }
 
   /**
-   * :: Experimental ::
    * Generates an RDD comprised of i.i.d. samples from the standard normal distribution.
    *
    * To transform the distribution in the generated RDD from standard normal to some other normal
-   * N(mean, sigma), use `RandomRDDGenerators.normalRDD(sc, n, p).map(v => mean + sigma * v)`.
+   * `N(mean, sigma^2^)`, use `RandomRDDs.normalRDD(sc, n, p, seed).map(v => mean + sigma * v)`.
    *
    * @param sc SparkContext used to create the RDD.
    * @param size Size of the RDD.
-   * @param numPartitions Number of partitions in the RDD.
+   * @param numPartitions Number of partitions in the RDD (default: `sc.defaultParallelism`).
+   * @param seed Random seed (default: a random long integer).
    * @return RDD[Double] comprised of i.i.d. samples ~ N(0.0, 1.0).
    */
-  @Experimental
-  def normalRDD(sc: SparkContext, size: Long, numPartitions: Int): RDD[Double] = {
-    normalRDD(sc, size, numPartitions, Utils.random.nextLong)
+  def normalRDD(
+      sc: SparkContext,
+      size: Long,
+      numPartitions: Int = 0,
+      seed: Long = Utils.random.nextLong()): RDD[Double] = {
+    val normal = new StandardNormalGenerator()
+    randomRDD(sc, normal, size, numPartitionsOrDefault(sc, numPartitions), seed)
   }
 
   /**
-   * :: Experimental ::
-   * Generates an RDD comprised of i.i.d. samples from the standard normal distribution.
-   * sc.defaultParallelism used for the number of partitions in the RDD.
-   *
-   * To transform the distribution in the generated RDD from standard normal to some other normal
-   * N(mean, sigma), use `RandomRDDGenerators.normalRDD(sc, n).map(v => mean + sigma * v)`.
-   *
-   * @param sc SparkContext used to create the RDD.
-   * @param size Size of the RDD.
-   * @return RDD[Double] comprised of i.i.d. samples ~ N(0.0, 1.0).
+   * Java-friendly version of [[RandomRDDs#normalRDD]].
    */
-  @Experimental
-  def normalRDD(sc: SparkContext, size: Long): RDD[Double] = {
-    normalRDD(sc, size, sc.defaultParallelism, Utils.random.nextLong)
+  def normalJavaRDD(
+      jsc: JavaSparkContext,
+      size: Long,
+      numPartitions: Int,
+      seed: Long): JavaDoubleRDD = {
+    JavaDoubleRDD.fromRDD(normalRDD(jsc.sc, size, numPartitions, seed))
   }
 
   /**
-   * :: Experimental ::
-   * Generates an RDD comprised of i.i.d. samples from the Poisson distribution with the input mean.
-   *
-   * @param sc SparkContext used to create the RDD.
-   * @param mean Mean, or lambda, for the Poisson distribution.
-   * @param size Size of the RDD.
-   * @param numPartitions Number of partitions in the RDD.
-   * @param seed Seed for the RNG that generates the seed for the generator in each partition.
-   * @return RDD[Double] comprised of i.i.d. samples ~ Pois(mean).
+   * [[RandomRDDs#normalJavaRDD]] with the default seed.
    */
-  @Experimental
-  def poissonRDD(sc: SparkContext,
-      mean: Double,
-      size: Long,
-      numPartitions: Int,
-      seed: Long): RDD[Double] = {
-    val poisson = new PoissonGenerator(mean)
-    randomRDD(sc, poisson, size, numPartitions, seed)
+  def normalJavaRDD(jsc: JavaSparkContext, size: Long, numPartitions: Int): JavaDoubleRDD = {
+    JavaDoubleRDD.fromRDD(normalRDD(jsc.sc, size, numPartitions))
   }
 
   /**
-   * :: Experimental ::
-   * Generates an RDD comprised of i.i.d. samples from the Poisson distribution with the input mean.
-   *
-   * @param sc SparkContext used to create the RDD.
-   * @param mean Mean, or lambda, for the Poisson distribution.
-   * @param size Size of the RDD.
-   * @param numPartitions Number of partitions in the RDD.
-   * @return RDD[Double] comprised of i.i.d. samples ~ Pois(mean).
+   * [[RandomRDDs#normalJavaRDD]] with the default number of partitions and the default seed.
    */
-  @Experimental
-  def poissonRDD(sc: SparkContext, mean: Double, size: Long, numPartitions: Int): RDD[Double] = {
-    poissonRDD(sc, mean, size, numPartitions, Utils.random.nextLong)
+  def normalJavaRDD(jsc: JavaSparkContext, size: Long): JavaDoubleRDD = {
+    JavaDoubleRDD.fromRDD(normalRDD(jsc.sc, size))
   }
 
   /**
-   * :: Experimental ::
    * Generates an RDD comprised of i.i.d. samples from the Poisson distribution with the input mean.
-   * sc.defaultParallelism used for the number of partitions in the RDD.
    *
    * @param sc SparkContext used to create the RDD.
    * @param mean Mean, or lambda, for the Poisson distribution.
    * @param size Size of the RDD.
+   * @param numPartitions Number of partitions in the RDD (default: `sc.defaultParallelism`).
+   * @param seed Random seed (default: a random long integer).
    * @return RDD[Double] comprised of i.i.d. samples ~ Pois(mean).
    */
-  @Experimental
-  def poissonRDD(sc: SparkContext, mean: Double, size: Long): RDD[Double] = {
-    poissonRDD(sc, mean, size, sc.defaultParallelism, Utils.random.nextLong)
+  def poissonRDD(
+      sc: SparkContext,
+      mean: Double,
+      size: Long,
+      numPartitions: Int = 0,
+      seed: Long = Utils.random.nextLong()): RDD[Double] = {
+    val poisson = new PoissonGenerator(mean)
+    randomRDD(sc, poisson, size, numPartitionsOrDefault(sc, numPartitions), seed)
   }
 
   /**
-   * :: Experimental ::
-   * Generates an RDD comprised of i.i.d. samples produced by the input DistributionGenerator.
-   *
-   * @param sc SparkContext used to create the RDD.
-   * @param generator DistributionGenerator used to populate the RDD.
-   * @param size Size of the RDD.
-   * @param numPartitions Number of partitions in the RDD.
-   * @param seed Seed for the RNG that generates the seed for the generator in each partition.
-   * @return RDD[Double] comprised of i.i.d. samples produced by generator.
+   * Java-friendly version of [[RandomRDDs#poissonRDD]].
    */
-  @Experimental
-  def randomRDD[T: ClassTag](sc: SparkContext,
-      generator: RandomDataGenerator[T],
+  def poissonJavaRDD(
+      jsc: JavaSparkContext,
+      mean: Double,
       size: Long,
       numPartitions: Int,
-      seed: Long): RDD[T] = {
-    new RandomRDD[T](sc, size, numPartitions, generator, seed)
+      seed: Long): JavaDoubleRDD = {
+    JavaDoubleRDD.fromRDD(poissonRDD(jsc.sc, mean, size, numPartitions, seed))
   }
 
   /**
-   * :: Experimental ::
-   * Generates an RDD comprised of i.i.d. samples produced by the input DistributionGenerator.
-   *
-   * @param sc SparkContext used to create the RDD.
-   * @param generator DistributionGenerator used to populate the RDD.
-   * @param size Size of the RDD.
-   * @param numPartitions Number of partitions in the RDD.
-   * @return RDD[Double] comprised of i.i.d. samples produced by generator.
+   * [[RandomRDDs#poissonJavaRDD]] with the default seed.
    */
-  @Experimental
-  def randomRDD[T: ClassTag](sc: SparkContext,
-      generator: RandomDataGenerator[T],
+  def poissonJavaRDD(
+      jsc: JavaSparkContext,
+      mean: Double,
       size: Long,
-      numPartitions: Int): RDD[T] = {
-    randomRDD[T](sc, generator, size, numPartitions, Utils.random.nextLong)
+      numPartitions: Int): JavaDoubleRDD = {
+    JavaDoubleRDD.fromRDD(poissonRDD(jsc.sc, mean, size, numPartitions))
   }
 
   /**
-   * :: Experimental ::
-   * Generates an RDD comprised of i.i.d. samples produced by the input DistributionGenerator.
-   * sc.defaultParallelism used for the number of partitions in the RDD.
+   * [[RandomRDDs#poissonJavaRDD]] with the default number of partitions and the default seed.
+   */
+  def poissonJavaRDD(jsc: JavaSparkContext, mean: Double, size: Long): JavaDoubleRDD = {
+    JavaDoubleRDD.fromRDD(poissonRDD(jsc.sc, mean, size))
+  }
+
+  /**
+   * :: DeveloperApi ::
+   * Generates an RDD comprised of i.i.d. samples produced by the input RandomDataGenerator.
    *
    * @param sc SparkContext used to create the RDD.
-   * @param generator DistributionGenerator used to populate the RDD.
+   * @param generator RandomDataGenerator used to populate the RDD.
    * @param size Size of the RDD.
+   * @param numPartitions Number of partitions in the RDD (default: `sc.defaultParallelism`).
+   * @param seed Random seed (default: a random long integer).
    * @return RDD[Double] comprised of i.i.d. samples produced by generator.
    */
-  @Experimental
-  def randomRDD[T: ClassTag](sc: SparkContext,
+  @DeveloperApi
+  def randomRDD[T: ClassTag](
+      sc: SparkContext,
       generator: RandomDataGenerator[T],
-      size: Long): RDD[T] = {
-    randomRDD[T](sc, generator, size, sc.defaultParallelism, Utils.random.nextLong)
+      size: Long,
+      numPartitions: Int = 0,
+      seed: Long = Utils.random.nextLong()): RDD[T] = {
+    new RandomRDD[T](sc, size, numPartitionsOrDefault(sc, numPartitions), generator, seed)
   }
 
   // TODO Generate RDD[Vector] from multivariate distributions.
 
   /**
-   * :: Experimental ::
    * Generates an RDD[Vector] with vectors containing i.i.d. samples drawn from the
-   * uniform distribution on [0.0 1.0].
+   * uniform distribution on `U(0.0, 1.0)`.
    *
    * @param sc SparkContext used to create the RDD.
    * @param numRows Number of Vectors in the RDD.
    * @param numCols Number of elements in each Vector.
    * @param numPartitions Number of partitions in the RDD.
    * @param seed Seed for the RNG that generates the seed for the generator in each partition.
-   * @return RDD[Vector] with vectors containing i.i.d samples ~ U[0.0, 1.0].
+   * @return RDD[Vector] with vectors containing i.i.d samples ~ `U(0.0, 1.0)`.
    */
-  @Experimental
-  def uniformVectorRDD(sc: SparkContext,
+  def uniformVectorRDD(
+      sc: SparkContext,
       numRows: Long,
       numCols: Int,
-      numPartitions: Int,
-      seed: Long): RDD[Vector] = {
+      numPartitions: Int = 0,
+      seed: Long = Utils.random.nextLong()): RDD[Vector] = {
     val uniform = new UniformGenerator()
-    randomVectorRDD(sc, uniform, numRows, numCols, numPartitions, seed)
+    randomVectorRDD(sc, uniform, numRows, numCols, numPartitionsOrDefault(sc, numPartitions), seed)
   }
 
   /**
-   * :: Experimental ::
-   * Generates an RDD[Vector] with vectors containing i.i.d. samples drawn from the
-   * uniform distribution on [0.0 1.0].
-   *
-   * @param sc SparkContext used to create the RDD.
-   * @param numRows Number of Vectors in the RDD.
-   * @param numCols Number of elements in each Vector.
-   * @param numPartitions Number of partitions in the RDD.
-   * @return RDD[Vector] with vectors containing i.i.d. samples ~ U[0.0, 1.0].
+   * Java-friendly version of [[RandomRDDs#uniformVectorRDD]].
    */
-  @Experimental
-  def uniformVectorRDD(sc: SparkContext,
+  def uniformJavaVectorRDD(
+      jsc: JavaSparkContext,
       numRows: Long,
       numCols: Int,
-      numPartitions: Int): RDD[Vector] = {
-    uniformVectorRDD(sc, numRows, numCols, numPartitions, Utils.random.nextLong)
+      numPartitions: Int,
+      seed: Long): JavaRDD[Vector] = {
+    uniformVectorRDD(jsc.sc, numRows, numCols, numPartitions, seed).toJavaRDD()
   }
 
   /**
-   * :: Experimental ::
-   * Generates an RDD[Vector] with vectors containing i.i.d. samples drawn from the
-   * uniform distribution on [0.0 1.0].
-   * sc.defaultParallelism used for the number of partitions in the RDD.
-   *
-   * @param sc SparkContext used to create the RDD.
-   * @param numRows Number of Vectors in the RDD.
-   * @param numCols Number of elements in each Vector.
-   * @return RDD[Vector] with vectors containing i.i.d. samples ~ U[0.0, 1.0].
+   * [[RandomRDDs#uniformJavaVectorRDD]] with the default seed.
    */
-  @Experimental
-  def uniformVectorRDD(sc: SparkContext, numRows: Long, numCols: Int): RDD[Vector] = {
-    uniformVectorRDD(sc, numRows, numCols, sc.defaultParallelism, Utils.random.nextLong)
+  def uniformJavaVectorRDD(
+      jsc: JavaSparkContext,
+      numRows: Long,
+      numCols: Int,
+      numPartitions: Int): JavaRDD[Vector] = {
+    uniformVectorRDD(jsc.sc, numRows, numCols, numPartitions).toJavaRDD()
+  }
+
+  /**
+   * [[RandomRDDs#uniformJavaVectorRDD]] with the default number of partitions and the default seed.
+   */
+  def uniformJavaVectorRDD(
+      jsc: JavaSparkContext,
+      numRows: Long,
+      numCols: Int): JavaRDD[Vector] = {
+    uniformVectorRDD(jsc.sc, numRows, numCols).toJavaRDD()
   }
 
   /**
-   * :: Experimental ::
    * Generates an RDD[Vector] with vectors containing i.i.d. samples drawn from the
    * standard normal distribution.
    *
    * @param sc SparkContext used to create the RDD.
    * @param numRows Number of Vectors in the RDD.
    * @param numCols Number of elements in each Vector.
-   * @param numPartitions Number of partitions in the RDD.
-   * @param seed Seed for the RNG that generates the seed for the generator in each partition.
-   * @return RDD[Vector] with vectors containing i.i.d. samples ~ N(0.0, 1.0).
+   * @param numPartitions Number of partitions in the RDD (default: `sc.defaultParallelism`).
+   * @param seed Random seed (default: a random long integer).
+   * @return RDD[Vector] with vectors containing i.i.d. samples ~ `N(0.0, 1.0)`.
+   */
+  def normalVectorRDD(
+      sc: SparkContext,
+      numRows: Long,
+      numCols: Int,
+      numPartitions: Int = 0,
+      seed: Long = Utils.random.nextLong()): RDD[Vector] = {
+    val normal = new StandardNormalGenerator()
+    randomVectorRDD(sc, normal, numRows, numCols, numPartitionsOrDefault(sc, numPartitions), seed)
+  }
+
+  /**
+   * Java-friendly version of [[RandomRDDs#normalVectorRDD]].
    */
-  @Experimental
-  def normalVectorRDD(sc: SparkContext,
+  def normalJavaVectorRDD(
+      jsc: JavaSparkContext,
       numRows: Long,
       numCols: Int,
       numPartitions: Int,
-      seed: Long): RDD[Vector] = {
-    val uniform = new StandardNormalGenerator()
-    randomVectorRDD(sc, uniform, numRows, numCols, numPartitions, seed)
+      seed: Long): JavaRDD[Vector] = {
+    normalVectorRDD(jsc.sc, numRows, numCols, numPartitions, seed).toJavaRDD()
   }
 
   /**
-   * :: Experimental ::
-   * Generates an RDD[Vector] with vectors containing i.i.d. samples drawn from the
-   * standard normal distribution.
-   *
-   * @param sc SparkContext used to create the RDD.
-   * @param numRows Number of Vectors in the RDD.
-   * @param numCols Number of elements in each Vector.
-   * @param numPartitions Number of partitions in the RDD.
-   * @return RDD[Vector] with vectors containing i.i.d. samples ~ N(0.0, 1.0).
+   * [[RandomRDDs#normalJavaVectorRDD]] with the default seed.
    */
-  @Experimental
-  def normalVectorRDD(sc: SparkContext,
+  def normalJavaVectorRDD(
+      jsc: JavaSparkContext,
       numRows: Long,
       numCols: Int,
-      numPartitions: Int): RDD[Vector] = {
-    normalVectorRDD(sc, numRows, numCols, numPartitions, Utils.random.nextLong)
+      numPartitions: Int): JavaRDD[Vector] = {
+    normalVectorRDD(jsc.sc, numRows, numCols, numPartitions).toJavaRDD()
   }
 
   /**
-   * :: Experimental ::
-   * Generates an RDD[Vector] with vectors containing i.i.d. samples drawn from the
-   * standard normal distribution.
-   * sc.defaultParallelism used for the number of partitions in the RDD.
-   *
-   * @param sc SparkContext used to create the RDD.
-   * @param numRows Number of Vectors in the RDD.
-   * @param numCols Number of elements in each Vector.
-   * @return RDD[Vector] with vectors containing i.i.d. samples ~ N(0.0, 1.0).
+   * [[RandomRDDs#normalJavaVectorRDD]] with the default number of partitions and the default seed.
    */
-  @Experimental
-  def normalVectorRDD(sc: SparkContext, numRows: Long, numCols: Int): RDD[Vector] = {
-    normalVectorRDD(sc, numRows, numCols, sc.defaultParallelism, Utils.random.nextLong)
+  def normalJavaVectorRDD(
+      jsc: JavaSparkContext,
+      numRows: Long,
+      numCols: Int): JavaRDD[Vector] = {
+    normalVectorRDD(jsc.sc, numRows, numCols).toJavaRDD()
   }
 
   /**
-   * :: Experimental ::
    * Generates an RDD[Vector] with vectors containing i.i.d. samples drawn from the
    * Poisson distribution with the input mean.
    *
@@ -370,124 +315,85 @@ object RandomRDDs {
    * @param mean Mean, or lambda, for the Poisson distribution.
    * @param numRows Number of Vectors in the RDD.
    * @param numCols Number of elements in each Vector.
-   * @param numPartitions Number of partitions in the RDD.
-   * @param seed Seed for the RNG that generates the seed for the generator in each partition.
+   * @param numPartitions Number of partitions in the RDD (default: `sc.defaultParallelism`)
+   * @param seed Random seed (default: a random long integer).
    * @return RDD[Vector] with vectors containing i.i.d. samples ~ Pois(mean).
    */
-  @Experimental
-  def poissonVectorRDD(sc: SparkContext,
+  def poissonVectorRDD(
+      sc: SparkContext,
       mean: Double,
       numRows: Long,
       numCols: Int,
-      numPartitions: Int,
-      seed: Long): RDD[Vector] = {
+      numPartitions: Int = 0,
+      seed: Long = Utils.random.nextLong()): RDD[Vector] = {
     val poisson = new PoissonGenerator(mean)
-    randomVectorRDD(sc, poisson, numRows, numCols, numPartitions, seed)
+    randomVectorRDD(sc, poisson, numRows, numCols, numPartitionsOrDefault(sc, numPartitions), seed)
   }
 
   /**
-   * :: Experimental ::
-   * Generates an RDD[Vector] with vectors containing i.i.d. samples drawn from the
-   * Poisson distribution with the input mean.
-   *
-   * @param sc SparkContext used to create the RDD.
-   * @param mean Mean, or lambda, for the Poisson distribution.
-   * @param numRows Number of Vectors in the RDD.
-   * @param numCols Number of elements in each Vector.
-   * @param numPartitions Number of partitions in the RDD.
-   * @return RDD[Vector] with vectors containing i.i.d. samples ~ Pois(mean).
+   * Java-friendly version of [[RandomRDDs#poissonVectorRDD]].
    */
-  @Experimental
-  def poissonVectorRDD(sc: SparkContext,
+  def poissonJavaVectorRDD(
+      jsc: JavaSparkContext,
       mean: Double,
       numRows: Long,
       numCols: Int,
-      numPartitions: Int): RDD[Vector] = {
-    poissonVectorRDD(sc, mean, numRows, numCols, numPartitions, Utils.random.nextLong)
+      numPartitions: Int,
+      seed: Long): JavaRDD[Vector] = {
+    poissonVectorRDD(jsc.sc, mean, numRows, numCols, numPartitions, seed).toJavaRDD()
   }
 
   /**
-   * :: Experimental ::
-   * Generates an RDD[Vector] with vectors containing i.i.d. samples drawn from the
-   * Poisson distribution with the input mean.
-   * sc.defaultParallelism used for the number of partitions in the RDD.
-   *
-   * @param sc SparkContext used to create the RDD.
-   * @param mean Mean, or lambda, for the Poisson distribution.
-   * @param numRows Number of Vectors in the RDD.
-   * @param numCols Number of elements in each Vector.
-   * @return RDD[Vector] with vectors containing i.i.d. samples ~ Pois(mean).
+   * [[RandomRDDs#poissonJavaVectorRDD]] with the default seed.
    */
-  @Experimental
-  def poissonVectorRDD(sc: SparkContext,
+  def poissonJavaVectorRDD(
+      jsc: JavaSparkContext,
       mean: Double,
       numRows: Long,
-      numCols: Int): RDD[Vector] = {
-    poissonVectorRDD(sc, mean, numRows, numCols, sc.defaultParallelism, Utils.random.nextLong)
+      numCols: Int,
+      numPartitions: Int): JavaRDD[Vector] = {
+    poissonVectorRDD(jsc.sc, mean, numRows, numCols, numPartitions).toJavaRDD()
   }
 
   /**
-   * :: Experimental ::
-   * Generates an RDD[Vector] with vectors containing i.i.d. samples produced by the
-   * input DistributionGenerator.
-   *
-   * @param sc SparkContext used to create the RDD.
-   * @param generator DistributionGenerator used to populate the RDD.
-   * @param numRows Number of Vectors in the RDD.
-   * @param numCols Number of elements in each Vector.
-   * @param numPartitions Number of partitions in the RDD.
-   * @param seed Seed for the RNG that generates the seed for the generator in each partition.
-   * @return RDD[Vector] with vectors containing i.i.d. samples produced by generator.
+   * [[RandomRDDs#poissonJavaVectorRDD]] with the default number of partitions and the default seed.
    */
-  @Experimental
-  def randomVectorRDD(sc: SparkContext,
-      generator: RandomDataGenerator[Double],
+  def poissonJavaVectorRDD(
+      jsc: JavaSparkContext,
+      mean: Double,
       numRows: Long,
-      numCols: Int,
-      numPartitions: Int,
-      seed: Long): RDD[Vector] = {
-    new RandomVectorRDD(sc, numRows, numCols, numPartitions, generator, seed)
+      numCols: Int): JavaRDD[Vector] = {
+    poissonVectorRDD(jsc.sc, mean, numRows, numCols).toJavaRDD()
   }
 
   /**
-   * :: Experimental ::
+   * :: DeveloperApi ::
    * Generates an RDD[Vector] with vectors containing i.i.d. samples produced by the
-   * input DistributionGenerator.
+   * input RandomDataGenerator.
    *
    * @param sc SparkContext used to create the RDD.
-   * @param generator DistributionGenerator used to populate the RDD.
+   * @param generator RandomDataGenerator used to populate the RDD.
    * @param numRows Number of Vectors in the RDD.
    * @param numCols Number of elements in each Vector.
-   * @param numPartitions Number of partitions in the RDD.
+   * @param numPartitions Number of partitions in the RDD (default: `sc.defaultParallelism`).
+   * @param seed Random seed (default: a random long integer).
    * @return RDD[Vector] with vectors containing i.i.d. samples produced by generator.
    */
-  @Experimental
+  @DeveloperApi
   def randomVectorRDD(sc: SparkContext,
       generator: RandomDataGenerator[Double],
       numRows: Long,
       numCols: Int,
-      numPartitions: Int): RDD[Vector] = {
-    randomVectorRDD(sc, generator, numRows, numCols, numPartitions, Utils.random.nextLong)
+      numPartitions: Int = 0,
+      seed: Long = Utils.random.nextLong()): RDD[Vector] = {
+    new RandomVectorRDD(
+      sc, numRows, numCols, numPartitionsOrDefault(sc, numPartitions), generator, seed)
   }
 
   /**
-   * :: Experimental ::
-   * Generates an RDD[Vector] with vectors containing i.i.d. samples produced by the
-   * input DistributionGenerator.
-   * sc.defaultParallelism used for the number of partitions in the RDD.
-   *
-   * @param sc SparkContext used to create the RDD.
-   * @param generator DistributionGenerator used to populate the RDD.
-   * @param numRows Number of Vectors in the RDD.
-   * @param numCols Number of elements in each Vector.
-   * @return RDD[Vector] with vectors containing i.i.d. samples produced by generator.
+   * Returns `numPartitions` if it is positive, or `sc.defaultParallelism` otherwise.
    */
-  @Experimental
-  def randomVectorRDD(sc: SparkContext,
-      generator: RandomDataGenerator[Double],
-      numRows: Long,
-      numCols: Int): RDD[Vector] = {
-    randomVectorRDD(sc, generator, numRows, numCols,
-      sc.defaultParallelism, Utils.random.nextLong)
+  private def numPartitionsOrDefault(sc: SparkContext, numPartitions: Int): Int = {
+    if (numPartitions > 0) numPartitions else sc.defaultMinPartitions
   }
 }
diff --git a/mllib/src/test/java/org/apache/spark/mllib/random/JavaRandomRDDsSuite.java b/mllib/src/test/java/org/apache/spark/mllib/random/JavaRandomRDDsSuite.java
new file mode 100644
index 0000000000000..a725736ca1a58
--- /dev/null
+++ b/mllib/src/test/java/org/apache/spark/mllib/random/JavaRandomRDDsSuite.java
@@ -0,0 +1,134 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.random;
+
+import com.google.common.collect.Lists;
+import org.apache.spark.api.java.JavaRDD;
+import org.junit.Assert;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+import org.apache.spark.api.java.JavaDoubleRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.mllib.linalg.Vector;
+import static org.apache.spark.mllib.random.RandomRDDs.*;
+
+public class JavaRandomRDDsSuite {
+  private transient JavaSparkContext sc;
+
+  @Before
+  public void setUp() {
+    sc = new JavaSparkContext("local", "JavaRandomRDDsSuite");
+  }
+
+  @After
+  public void tearDown() {
+    sc.stop();
+    sc = null;
+  }
+
+  @Test
+  public void testUniformRDD() {
+    long m = 1000L;
+    int p = 2;
+    long seed = 1L;
+    JavaDoubleRDD rdd1 = uniformJavaRDD(sc, m);
+    JavaDoubleRDD rdd2 = uniformJavaRDD(sc, m, p);
+    JavaDoubleRDD rdd3 = uniformJavaRDD(sc, m, p, seed);
+    for (JavaDoubleRDD rdd: Lists.newArrayList(rdd1, rdd2, rdd3)) {
+      Assert.assertEquals(m, rdd.count());
+    }
+  }
+
+  @Test
+  public void testNormalRDD() {
+    long m = 1000L;
+    int p = 2;
+    long seed = 1L;
+    JavaDoubleRDD rdd1 = normalJavaRDD(sc, m);
+    JavaDoubleRDD rdd2 = normalJavaRDD(sc, m, p);
+    JavaDoubleRDD rdd3 = normalJavaRDD(sc, m, p, seed);
+    for (JavaDoubleRDD rdd: Lists.newArrayList(rdd1, rdd2, rdd3)) {
+      Assert.assertEquals(m, rdd.count());
+    }
+  }
+
+  @Test
+  public void testPoissonRDD() {
+    double mean = 2.0;
+    long m = 1000L;
+    int p = 2;
+    long seed = 1L;
+    JavaDoubleRDD rdd1 = poissonJavaRDD(sc, mean, m);
+    JavaDoubleRDD rdd2 = poissonJavaRDD(sc, mean, m, p);
+    JavaDoubleRDD rdd3 = poissonJavaRDD(sc, mean, m, p, seed);
+    for (JavaDoubleRDD rdd: Lists.newArrayList(rdd1, rdd2, rdd3)) {
+      Assert.assertEquals(m, rdd.count());
+    }
+  }
+
+  @Test
+  @SuppressWarnings("unchecked")
+  public void testUniformVectorRDD() {
+    long m = 100L;
+    int n = 10;
+    int p = 2;
+    long seed = 1L;
+    JavaRDD<Vector> rdd1 = uniformJavaVectorRDD(sc, m, n);
+    JavaRDD<Vector> rdd2 = uniformJavaVectorRDD(sc, m, n, p);
+    JavaRDD<Vector> rdd3 = uniformJavaVectorRDD(sc, m, n, p, seed);
+    for (JavaRDD<Vector> rdd: Lists.newArrayList(rdd1, rdd2, rdd3)) {
+      Assert.assertEquals(m, rdd.count());
+      Assert.assertEquals(n, rdd.first().size());
+    }
+  }
+
+  @Test
+  @SuppressWarnings("unchecked")
+  public void testNormalVectorRDD() {
+    long m = 100L;
+    int n = 10;
+    int p = 2;
+    long seed = 1L;
+    JavaRDD<Vector> rdd1 = normalJavaVectorRDD(sc, m, n);
+    JavaRDD<Vector> rdd2 = normalJavaVectorRDD(sc, m, n, p);
+    JavaRDD<Vector> rdd3 = normalJavaVectorRDD(sc, m, n, p, seed);
+    for (JavaRDD<Vector> rdd: Lists.newArrayList(rdd1, rdd2, rdd3)) {
+      Assert.assertEquals(m, rdd.count());
+      Assert.assertEquals(n, rdd.first().size());
+    }
+  }
+
+  @Test
+  @SuppressWarnings("unchecked")
+  public void testPoissonVectorRDD() {
+    double mean = 2.0;
+    long m = 100L;
+    int n = 10;
+    int p = 2;
+    long seed = 1L;
+    JavaRDD<Vector> rdd1 = poissonJavaVectorRDD(sc, mean, m, n);
+    JavaRDD<Vector> rdd2 = poissonJavaVectorRDD(sc, mean, m, n, p);
+    JavaRDD<Vector> rdd3 = poissonJavaVectorRDD(sc, mean, m, n, p, seed);
+    for (JavaRDD<Vector> rdd: Lists.newArrayList(rdd1, rdd2, rdd3)) {
+      Assert.assertEquals(m, rdd.count());
+      Assert.assertEquals(n, rdd.first().size());
+    }
+  }
+}
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/classification/NaiveBayesSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/classification/NaiveBayesSuite.scala
index 06cdd04f5fdae..80989bc074e84 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/classification/NaiveBayesSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/classification/NaiveBayesSuite.scala
@@ -21,6 +21,7 @@ import scala.util.Random
 
 import org.scalatest.FunSuite
 
+import org.apache.spark.SparkException
 import org.apache.spark.mllib.linalg.Vectors
 import org.apache.spark.mllib.regression.LabeledPoint
 import org.apache.spark.mllib.util.{LocalClusterSparkContext, LocalSparkContext}
@@ -95,6 +96,33 @@ class NaiveBayesSuite extends FunSuite with LocalSparkContext {
     // Test prediction on Array.
     validatePrediction(validationData.map(row => model.predict(row.features)), validationData)
   }
+
+  test("detect negative values") {
+    val dense = Seq(
+      LabeledPoint(1.0, Vectors.dense(1.0)),
+      LabeledPoint(0.0, Vectors.dense(-1.0)),
+      LabeledPoint(1.0, Vectors.dense(1.0)),
+      LabeledPoint(1.0, Vectors.dense(0.0)))
+    intercept[SparkException] {
+      NaiveBayes.train(sc.makeRDD(dense, 2))
+    }
+    val sparse = Seq(
+      LabeledPoint(1.0, Vectors.sparse(1, Array(0), Array(1.0))),
+      LabeledPoint(0.0, Vectors.sparse(1, Array(0), Array(-1.0))),
+      LabeledPoint(1.0, Vectors.sparse(1, Array(0), Array(1.0))),
+      LabeledPoint(1.0, Vectors.sparse(1, Array.empty, Array.empty)))
+    intercept[SparkException] {
+      NaiveBayes.train(sc.makeRDD(sparse, 2))
+    }
+    val nan = Seq(
+      LabeledPoint(1.0, Vectors.sparse(1, Array(0), Array(1.0))),
+      LabeledPoint(0.0, Vectors.sparse(1, Array(0), Array(Double.NaN))),
+      LabeledPoint(1.0, Vectors.sparse(1, Array(0), Array(1.0))),
+      LabeledPoint(1.0, Vectors.sparse(1, Array.empty, Array.empty)))
+    intercept[SparkException] {
+      NaiveBayes.train(sc.makeRDD(nan, 2))
+    }
+  }
 }
 
 class NaiveBayesClusterSuite extends FunSuite with LocalClusterSparkContext {
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionSuite.scala
index 45e25eecf508e..03b71301e9ab1 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionSuite.scala
@@ -17,20 +17,19 @@
 
 package org.apache.spark.mllib.regression
 
-import java.io.File
-import java.nio.charset.Charset
-
 import scala.collection.mutable.ArrayBuffer
 
-import com.google.common.io.Files
 import org.scalatest.FunSuite
 
 import org.apache.spark.mllib.linalg.Vectors
-import org.apache.spark.mllib.util.{LinearDataGenerator, LocalSparkContext}
-import org.apache.spark.streaming.{Milliseconds, StreamingContext}
-import org.apache.spark.util.Utils
+import org.apache.spark.mllib.util.LinearDataGenerator
+import org.apache.spark.streaming.dstream.DStream
+import org.apache.spark.streaming.TestSuiteBase
+
+class StreamingLinearRegressionSuite extends FunSuite with TestSuiteBase {
 
-class StreamingLinearRegressionSuite extends FunSuite with LocalSparkContext {
+  // use longer wait time to ensure job completion
+  override def maxWaitTimeMillis = 20000
 
   // Assert that two values are equal within tolerance epsilon
   def assertEqual(v1: Double, v2: Double, epsilon: Double) {
@@ -49,35 +48,25 @@ class StreamingLinearRegressionSuite extends FunSuite with LocalSparkContext {
   }
 
   // Test if we can accurately learn Y = 10*X1 + 10*X2 on streaming data
-  test("streaming linear regression parameter accuracy") {
-
-    val testDir = Files.createTempDir()
-    val numBatches = 10
-    val batchDuration = Milliseconds(1000)
-    val ssc = new StreamingContext(sc, batchDuration)
-    val data = ssc.textFileStream(testDir.toString).map(LabeledPoint.parse)
+  test("parameter accuracy") {
+    // create model
     val model = new StreamingLinearRegressionWithSGD()
       .setInitialWeights(Vectors.dense(0.0, 0.0))
       .setStepSize(0.1)
-      .setNumIterations(50)
-
-    model.trainOn(data)
-
-    ssc.start()
+      .setNumIterations(25)
 
-    // write data to a file stream
-    for (i <- 0 until numBatches) {
-      val samples = LinearDataGenerator.generateLinearInput(
-        0.0, Array(10.0, 10.0), 100, 42 * (i + 1))
-      val file = new File(testDir, i.toString)
-      Files.write(samples.map(x => x.toString).mkString("\n"), file, Charset.forName("UTF-8"))
-      Thread.sleep(batchDuration.milliseconds)
+    // generate sequence of simulated data
+    val numBatches = 10
+    val input = (0 until numBatches).map { i =>
+      LinearDataGenerator.generateLinearInput(0.0, Array(10.0, 10.0), 100, 42 * (i + 1))
     }
 
-    ssc.stop(stopSparkContext=false)
-
-    System.clearProperty("spark.driver.port")
-    Utils.deleteRecursively(testDir)
+    // apply model training to input stream
+    val ssc = setupStreams(input, (inputDStream: DStream[LabeledPoint]) => {
+      model.trainOn(inputDStream)
+      inputDStream.count()
+    })
+    runStreams(ssc, numBatches, numBatches)
 
     // check accuracy of final parameter estimates
     assertEqual(model.latestModel().intercept, 0.0, 0.1)
@@ -91,45 +80,63 @@ class StreamingLinearRegressionSuite extends FunSuite with LocalSparkContext {
   }
 
   // Test that parameter estimates improve when learning Y = 10*X1 on streaming data
-  test("streaming linear regression parameter convergence") {
-
-    val testDir = Files.createTempDir()
-    val batchDuration = Milliseconds(2000)
-    val ssc = new StreamingContext(sc, batchDuration)
-    val numBatches = 5
-    val data = ssc.textFileStream(testDir.toString()).map(LabeledPoint.parse)
+  test("parameter convergence") {
+    // create model
     val model = new StreamingLinearRegressionWithSGD()
       .setInitialWeights(Vectors.dense(0.0))
       .setStepSize(0.1)
-      .setNumIterations(50)
+      .setNumIterations(25)
 
-    model.trainOn(data)
-
-    ssc.start()
-
-    // write data to a file stream
-    val history = new ArrayBuffer[Double](numBatches)
-    for (i <- 0 until numBatches) {
-      val samples = LinearDataGenerator.generateLinearInput(0.0, Array(10.0), 100, 42 * (i + 1))
-      val file = new File(testDir, i.toString)
-      Files.write(samples.map(x => x.toString).mkString("\n"), file, Charset.forName("UTF-8"))
-      Thread.sleep(batchDuration.milliseconds)
-      // wait an extra few seconds to make sure the update finishes before new data arrive
-      Thread.sleep(4000)
-      history.append(math.abs(model.latestModel().weights(0) - 10.0))
+    // generate sequence of simulated data
+    val numBatches = 10
+    val input = (0 until numBatches).map { i =>
+      LinearDataGenerator.generateLinearInput(0.0, Array(10.0), 100, 42 * (i + 1))
     }
 
-    ssc.stop(stopSparkContext=false)
+    // create buffer to store intermediate fits
+    val history = new ArrayBuffer[Double](numBatches)
 
-    System.clearProperty("spark.driver.port")
-    Utils.deleteRecursively(testDir)
+    // apply model training to input stream, storing the intermediate results
+    // (we add a count to ensure the result is a DStream)
+    val ssc = setupStreams(input, (inputDStream: DStream[LabeledPoint]) => {
+      model.trainOn(inputDStream)
+      inputDStream.foreachRDD(x => history.append(math.abs(model.latestModel().weights(0) - 10.0)))
+      inputDStream.count()
+    })
+    runStreams(ssc, numBatches, numBatches)
 
+    // compute change in error
     val deltas = history.drop(1).zip(history.dropRight(1))
     // check error stability (it always either shrinks, or increases with small tol)
     assert(deltas.forall(x => (x._1 - x._2) <= 0.1))
     // check that error shrunk on at least 2 batches
     assert(deltas.map(x => if ((x._1 - x._2) < 0) 1 else 0).sum > 1)
-
   }
 
+  // Test predictions on a stream
+  test("predictions") {
+    // create model initialized with true weights
+    val model = new StreamingLinearRegressionWithSGD()
+      .setInitialWeights(Vectors.dense(10.0, 10.0))
+      .setStepSize(0.1)
+      .setNumIterations(25)
+
+    // generate sequence of simulated data for testing
+    val numBatches = 10
+    val nPoints = 100
+    val testInput = (0 until numBatches).map { i =>
+      LinearDataGenerator.generateLinearInput(0.0, Array(10.0, 10.0), nPoints, 42 * (i + 1))
+    }
+
+    // apply model predictions to test stream
+    val ssc = setupStreams(testInput, (inputDStream: DStream[LabeledPoint]) => {
+      model.predictOnValues(inputDStream.map(x => (x.label, x.features)))
+    })
+    // collect the output as (true, estimated) tuples
+    val output: Seq[Seq[(Double, Double)]] = runStreams(ssc, numBatches, numBatches)
+
+    // compute the mean absolute error and check that it's always less than 0.1
+    val errors = output.map(batch => batch.map(p => math.abs(p._1 - p._2)).sum / nPoints)
+    assert(errors.forall(x => x <= 0.1))
+  }
 }
diff --git a/pom.xml b/pom.xml
index ef12c8f1a5c49..7ed07ad7df88d 100644
--- a/pom.xml
+++ b/pom.xml
@@ -420,7 +420,7 @@
       <dependency>
         <groupId>io.netty</groupId>
         <artifactId>netty-all</artifactId>
-        <version>4.0.17.Final</version>
+        <version>4.0.23.Final</version>
       </dependency>
       <dependency>
         <groupId>org.apache.derby</groupId>
@@ -1115,18 +1115,49 @@
     </profile>
 
     <profile>
-      <id>mapr</id>
+      <id>mapr3</id>
       <activation>
         <activeByDefault>false</activeByDefault>
       </activation>
       <properties>
         <hadoop.version>1.0.3-mapr-3.0.3</hadoop.version>
-        <yarn.version>2.3.0-mapr-4.0.0-beta</yarn.version>
-        <hbase.version>0.94.17-mapr-1403</hbase.version>
-        <zookeeper.version>3.4.5-mapr-1401</zookeeper.version>
+        <yarn.version>2.3.0-mapr-4.0.0-FCS</yarn.version>
+        <hbase.version>0.94.17-mapr-1405</hbase.version>
+        <zookeeper.version>3.4.5-mapr-1406</zookeeper.version>
       </properties>
     </profile>
 
+    <profile>
+      <id>mapr4</id>
+      <activation>
+        <activeByDefault>false</activeByDefault>
+      </activation>
+      <properties>
+        <hadoop.version>2.3.0-mapr-4.0.0-FCS</hadoop.version>
+        <yarn.version>2.3.0-mapr-4.0.0-FCS</yarn.version>
+        <hbase.version>0.94.17-mapr-1405-4.0.0-FCS</hbase.version>
+        <zookeeper.version>3.4.5-mapr-1406</zookeeper.version>
+      </properties>
+      <dependencies>
+        <dependency>
+          <groupId>org.apache.curator</groupId>
+          <artifactId>curator-recipes</artifactId>
+          <version>2.4.0</version>
+          <exclusions>
+            <exclusion>
+              <groupId>org.apache.zookeeper</groupId>
+              <artifactId>zookeeper</artifactId>
+            </exclusion>
+          </exclusions>
+        </dependency>
+        <dependency>
+          <groupId>org.apache.zookeeper</groupId>
+          <artifactId>zookeeper</artifactId>
+          <version>3.4.5-mapr-1406</version> 
+        </dependency>
+      </dependencies>
+    </profile>
+
     <!-- Build without Hadoop dependencies that are included in some runtime environments. -->
     <profile>
       <id>hadoop-provided</id>
@@ -1179,7 +1210,7 @@
     </profile>
 
     <profile>
-      <id>hive-thriftserver</id>
+      <id>hive</id>
       <activation>
         <activeByDefault>false</activeByDefault>
       </activation>
diff --git a/python/pyspark/mllib/random.py b/python/pyspark/mllib/random.py
index 3f3b19053d32e..4dc1a4a912421 100644
--- a/python/pyspark/mllib/random.py
+++ b/python/pyspark/mllib/random.py
@@ -35,10 +35,10 @@ class RandomRDDs:
     def uniformRDD(sc, size, numPartitions=None, seed=None):
         """
         Generates an RDD comprised of i.i.d. samples from the
-        uniform distribution on [0.0, 1.0].
+        uniform distribution U(0.0, 1.0).
 
-        To transform the distribution in the generated RDD from U[0.0, 1.0]
-        to U[a, b], use
+        To transform the distribution in the generated RDD from U(0.0, 1.0)
+        to U(a, b), use
         C{RandomRDDs.uniformRDD(sc, n, p, seed)\
           .map(lambda v: a + (b - a) * v)}
 
@@ -60,11 +60,11 @@ def uniformRDD(sc, size, numPartitions=None, seed=None):
     @staticmethod
     def normalRDD(sc, size, numPartitions=None, seed=None):
         """
-        Generates an RDD comprised of i.i.d samples from the standard normal
+        Generates an RDD comprised of i.i.d. samples from the standard normal
         distribution.
 
         To transform the distribution in the generated RDD from standard normal
-        to some other normal N(mean, sigma), use
+        to some other normal N(mean, sigma^2), use
         C{RandomRDDs.normal(sc, n, p, seed)\
           .map(lambda v: mean + sigma * v)}
 
@@ -84,7 +84,7 @@ def normalRDD(sc, size, numPartitions=None, seed=None):
     @staticmethod
     def poissonRDD(sc, mean, size, numPartitions=None, seed=None):
         """
-        Generates an RDD comprised of i.i.d samples from the Poisson
+        Generates an RDD comprised of i.i.d. samples from the Poisson
         distribution with the input mean.
 
         >>> mean = 100.0
@@ -105,8 +105,8 @@ def poissonRDD(sc, mean, size, numPartitions=None, seed=None):
     @staticmethod
     def uniformVectorRDD(sc, numRows, numCols, numPartitions=None, seed=None):
         """
-        Generates an RDD comprised of vectors containing i.i.d samples drawn
-        from the uniform distribution on [0.0 1.0].
+        Generates an RDD comprised of vectors containing i.i.d. samples drawn
+        from the uniform distribution U(0.0, 1.0).
 
         >>> import numpy as np
         >>> mat = np.matrix(RandomRDDs.uniformVectorRDD(sc, 10, 10).collect())
@@ -125,7 +125,7 @@ def uniformVectorRDD(sc, numRows, numCols, numPartitions=None, seed=None):
     @staticmethod
     def normalVectorRDD(sc, numRows, numCols, numPartitions=None, seed=None):
         """
-        Generates an RDD comprised of vectors containing i.i.d samples drawn
+        Generates an RDD comprised of vectors containing i.i.d. samples drawn
         from the standard normal distribution.
 
         >>> import numpy as np
@@ -145,7 +145,7 @@ def normalVectorRDD(sc, numRows, numCols, numPartitions=None, seed=None):
     @staticmethod
     def poissonVectorRDD(sc, mean, numRows, numCols, numPartitions=None, seed=None):
         """
-        Generates an RDD comprised of vectors containing i.i.d samples drawn
+        Generates an RDD comprised of vectors containing i.i.d. samples drawn
         from the Poisson distribution with the input mean.
 
         >>> import numpy as np
diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py
index c708b69cc1e31..3eefc878d274e 100644
--- a/python/pyspark/rdd.py
+++ b/python/pyspark/rdd.py
@@ -575,6 +575,8 @@ def sortByKey(self, ascending=True, numPartitions=None, keyfunc=lambda x: x):
         # noqa
 
         >>> tmp = [('a', 1), ('b', 2), ('1', 3), ('d', 4), ('2', 5)]
+        >>> sc.parallelize(tmp).sortByKey().first()
+        ('1', 3)
         >>> sc.parallelize(tmp).sortByKey(True, 1).collect()
         [('1', 3), ('2', 5), ('a', 1), ('b', 2), ('d', 4)]
         >>> sc.parallelize(tmp).sortByKey(True, 2).collect()
@@ -587,14 +589,13 @@ def sortByKey(self, ascending=True, numPartitions=None, keyfunc=lambda x: x):
         if numPartitions is None:
             numPartitions = self._defaultReducePartitions()
 
+        def sortPartition(iterator):
+            return iter(sorted(iterator, key=lambda (k, v): keyfunc(k), reverse=not ascending))
+
         if numPartitions == 1:
             if self.getNumPartitions() > 1:
                 self = self.coalesce(1)
-
-            def sort(iterator):
-                return sorted(iterator, reverse=(not ascending), key=lambda (k, v): keyfunc(k))
-
-            return self.mapPartitions(sort)
+            return self.mapPartitions(sortPartition)
 
         # first compute the boundary of each part via sampling: we want to partition
         # the key-space into bins such that the bins have roughly the same
@@ -610,17 +611,14 @@ def sort(iterator):
         bounds = [samples[len(samples) * (i + 1) / numPartitions]
                   for i in range(0, numPartitions - 1)]
 
-        def rangePartitionFunc(k):
+        def rangePartitioner(k):
             p = bisect.bisect_left(bounds, keyfunc(k))
             if ascending:
                 return p
             else:
                 return numPartitions - 1 - p
 
-        def mapFunc(iterator):
-            return sorted(iterator, reverse=(not ascending), key=lambda (k, v): keyfunc(k))
-
-        return self.partitionBy(numPartitions, rangePartitionFunc).mapPartitions(mapFunc, True)
+        return self.partitionBy(numPartitions, rangePartitioner).mapPartitions(sortPartition, True)
 
     def sortBy(self, keyfunc, ascending=True, numPartitions=None):
         """
@@ -1687,6 +1685,31 @@ def zip(self, other):
         >>> x.zip(y).collect()
         [(0, 1000), (1, 1001), (2, 1002), (3, 1003), (4, 1004)]
         """
+        if self.getNumPartitions() != other.getNumPartitions():
+            raise ValueError("Can only zip with RDD which has the same number of partitions")
+
+        def get_batch_size(ser):
+            if isinstance(ser, BatchedSerializer):
+                return ser.batchSize
+            return 0
+
+        def batch_as(rdd, batchSize):
+            ser = rdd._jrdd_deserializer
+            if isinstance(ser, BatchedSerializer):
+                ser = ser.serializer
+            return rdd._reserialize(BatchedSerializer(ser, batchSize))
+
+        my_batch = get_batch_size(self._jrdd_deserializer)
+        other_batch = get_batch_size(other._jrdd_deserializer)
+        if my_batch != other_batch:
+            # use the greatest batchSize to batch the other one.
+            if my_batch > other_batch:
+                other = batch_as(other, my_batch)
+            else:
+                self = batch_as(self, other_batch)
+
+        # There will be an Exception in JVM if there are different number
+        # of items in each partitions.
         pairRDD = self._jrdd.zip(other._jrdd)
         deserializer = PairDeserializer(self._jrdd_deserializer,
                                         other._jrdd_deserializer)
@@ -1812,7 +1835,7 @@ def _jrdd(self):
             self._jrdd_deserializer = NoOpSerializer()
         command = (self.func, self._prev_jrdd_deserializer,
                    self._jrdd_deserializer)
-        ser = CompressedSerializer(CloudPickleSerializer())
+        ser = CloudPickleSerializer()
         pickled_command = ser.dumps(command)
         broadcast_vars = ListConverter().convert(
             [x._jbroadcast for x in self.ctx._pickled_broadcast_vars],
diff --git a/python/pyspark/serializers.py b/python/pyspark/serializers.py
index 74870c0edcf99..fc49aa42dbaf9 100644
--- a/python/pyspark/serializers.py
+++ b/python/pyspark/serializers.py
@@ -255,6 +255,9 @@ def __init__(self, key_ser, val_ser):
 
     def load_stream(self, stream):
         for (keys, vals) in self.prepare_keys_values(stream):
+            if len(keys) != len(vals):
+                raise ValueError("Can not deserialize RDD with different number of items"
+                                 " in pair: (%d, %d)" % (len(keys), len(vals)))
             for pair in izip(keys, vals):
                 yield pair
 
diff --git a/python/pyspark/shuffle.py b/python/pyspark/shuffle.py
index 2c68cd4921deb..1ebe7df418327 100644
--- a/python/pyspark/shuffle.py
+++ b/python/pyspark/shuffle.py
@@ -214,7 +214,7 @@ def __init__(self, aggregator, memory_limit=512, serializer=None,
 
     def _get_dirs(self):
         """ Get all the directories """
-        path = os.environ.get("SPARK_LOCAL_DIR", "/tmp")
+        path = os.environ.get("SPARK_LOCAL_DIRS", "/tmp")
         dirs = path.split(",")
         return [os.path.join(d, "python", str(os.getpid()), str(id(self)))
                 for d in dirs]
diff --git a/python/pyspark/tests.py b/python/pyspark/tests.py
index 69d543d9d045d..51bfbb47e53c2 100644
--- a/python/pyspark/tests.py
+++ b/python/pyspark/tests.py
@@ -39,7 +39,7 @@
 
 from pyspark.context import SparkContext
 from pyspark.files import SparkFiles
-from pyspark.serializers import read_int
+from pyspark.serializers import read_int, BatchedSerializer, MarshalSerializer, PickleSerializer
 from pyspark.shuffle import Aggregator, InMemoryMerger, ExternalMerger
 
 _have_scipy = False
@@ -339,6 +339,31 @@ def test_large_broadcast(self):
         m = self.sc.parallelize(range(1), 1).map(lambda x: len(bdata.value)).sum()
         self.assertEquals(N, m)
 
+    def test_zip_with_different_serializers(self):
+        a = self.sc.parallelize(range(5))
+        b = self.sc.parallelize(range(100, 105))
+        self.assertEqual(a.zip(b).collect(), [(0, 100), (1, 101), (2, 102), (3, 103), (4, 104)])
+        a = a._reserialize(BatchedSerializer(PickleSerializer(), 2))
+        b = b._reserialize(MarshalSerializer())
+        self.assertEqual(a.zip(b).collect(), [(0, 100), (1, 101), (2, 102), (3, 103), (4, 104)])
+
+    def test_zip_with_different_number_of_items(self):
+        a = self.sc.parallelize(range(5), 2)
+        # different number of partitions
+        b = self.sc.parallelize(range(100, 106), 3)
+        self.assertRaises(ValueError, lambda: a.zip(b))
+        # different number of batched items in JVM
+        b = self.sc.parallelize(range(100, 104), 2)
+        self.assertRaises(Exception, lambda: a.zip(b).count())
+        # different number of items in one pair
+        b = self.sc.parallelize(range(100, 106), 2)
+        self.assertRaises(Exception, lambda: a.zip(b).count())
+        # same total number of items, but different distributions
+        a = self.sc.parallelize([2, 3], 2).flatMap(range)
+        b = self.sc.parallelize([3, 2], 2).flatMap(range)
+        self.assertEquals(a.count(), b.count())
+        self.assertRaises(Exception, lambda: a.zip(b).count())
+
 
 class TestIO(PySparkTestCase):
 
diff --git a/python/pyspark/worker.py b/python/pyspark/worker.py
index 77a9c4a0e0677..6805063e06798 100644
--- a/python/pyspark/worker.py
+++ b/python/pyspark/worker.py
@@ -72,7 +72,7 @@ def main(infile, outfile):
             value = ser._read_with_length(infile)
             _broadcastRegistry[bid] = Broadcast(bid, value)
 
-        command = ser._read_with_length(infile)
+        command = pickleSer._read_with_length(infile)
         (func, deserializer, serializer) = command
         init_time = time.time()
         iterator = deserializer.load_stream(infile)
diff --git a/python/run-tests b/python/run-tests
index b506559a5e810..7b1ee3e1cddba 100755
--- a/python/run-tests
+++ b/python/run-tests
@@ -59,9 +59,7 @@ $PYSPARK_PYTHON --version
 run_test "pyspark/rdd.py"
 run_test "pyspark/context.py"
 run_test "pyspark/conf.py"
-if [ -n "$_RUN_SQL_TESTS" ]; then
-  run_test "pyspark/sql.py"
-fi
+run_test "pyspark/sql.py"
 # These tests are included in the module-level docs, and so must
 # be handled on a higher level rather than within the python file.
 export PYSPARK_DOC_TEST=1
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala
index 4f2adb006fbc7..5cc41a83cc792 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala
@@ -54,6 +54,7 @@ private[spark] object SQLConf {
 trait SQLConf {
   import SQLConf._
 
+  /** Only low degree of contention is expected for conf, thus NOT using ConcurrentHashMap. */
   @transient protected[spark] val settings = java.util.Collections.synchronizedMap(
     new java.util.HashMap[String, String]())
 
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
index 2bf8cfdcacd22..70bea1ed80fda 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
@@ -32,7 +32,7 @@ class CliSuite extends FunSuite with BeforeAndAfterAll with TestUtils {
     val commands =
       s"""../../bin/spark-sql
          |  --master local
-         |  --hiveconf ${ConfVars.METASTORECONNECTURLKEY}="$jdbcUrl"
+         |  --hiveconf ${ConfVars.METASTORECONNECTURLKEY}=$jdbcUrl
          |  --hiveconf ${ConfVars.METASTOREWAREHOUSE}=$WAREHOUSE_PATH
        """.stripMargin.split("\\s+")
 
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suite.scala
index aedef6ce1f5f2..326b0a7275b34 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suite.scala
@@ -51,9 +51,6 @@ class HiveThriftServer2Suite extends FunSuite with BeforeAndAfterAll with TestUt
     port
   }
 
-  // If verbose is true, the test program will print all outputs coming from the Hive Thrift server.
-  val VERBOSE = Option(System.getenv("SPARK_SQL_TEST_VERBOSE")).getOrElse("false").toBoolean
-
   Class.forName(DRIVER_NAME)
 
   override def beforeAll() { launchServer() }
@@ -68,8 +65,7 @@ class HiveThriftServer2Suite extends FunSuite with BeforeAndAfterAll with TestUt
     val command =
       s"""../../sbin/start-thriftserver.sh
          |  --master local
-         |  --hiveconf hive.root.logger=INFO,console
-         |  --hiveconf ${ConfVars.METASTORECONNECTURLKEY}="$jdbcUrl"
+         |  --hiveconf ${ConfVars.METASTORECONNECTURLKEY}=$jdbcUrl
          |  --hiveconf ${ConfVars.METASTOREWAREHOUSE}=$METASTORE_PATH
          |  --hiveconf ${ConfVars.HIVE_SERVER2_THRIFT_BIND_HOST}=$HOST
          |  --hiveconf ${ConfVars.HIVE_SERVER2_THRIFT_PORT}=$PORT
@@ -77,12 +73,10 @@ class HiveThriftServer2Suite extends FunSuite with BeforeAndAfterAll with TestUt
 
     val pb = new ProcessBuilder(command ++ args: _*)
     val environment = pb.environment()
-    environment.put("HIVE_SERVER2_THRIFT_PORT", PORT.toString)
-    environment.put("HIVE_SERVER2_THRIFT_BIND_HOST", HOST)
     process = pb.start()
     inputReader = new BufferedReader(new InputStreamReader(process.getInputStream))
     errorReader = new BufferedReader(new InputStreamReader(process.getErrorStream))
-    waitForOutput(inputReader, "ThriftBinaryCLIService listening on")
+    waitForOutput(inputReader, "ThriftBinaryCLIService listening on", 300000)
 
     // Spawn a thread to read the output from the forked process.
     // Note that this is necessary since in some configurations, log4j could be blocked
@@ -91,12 +85,8 @@ class HiveThriftServer2Suite extends FunSuite with BeforeAndAfterAll with TestUt
       while (true) {
         val stdout = readFrom(inputReader)
         val stderr = readFrom(errorReader)
-        if (VERBOSE && stdout.length > 0) {
-          println(stdout)
-        }
-        if (VERBOSE && stderr.length > 0) {
-          println(stderr)
-        }
+        print(stdout)
+        print(stderr)
         Thread.sleep(50)
       }
     }
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala b/streaming/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala
index cc178fba12c9d..759baacaa4308 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala
@@ -17,18 +17,18 @@
 
 package org.apache.spark.streaming
 
-import org.apache.spark.streaming.dstream.{DStream, InputDStream, ForEachDStream}
-import org.apache.spark.streaming.util.ManualClock
+import java.io.{ObjectInputStream, IOException}
 
 import scala.collection.mutable.ArrayBuffer
 import scala.collection.mutable.SynchronizedBuffer
 import scala.reflect.ClassTag
 
-import java.io.{ObjectInputStream, IOException}
-
 import org.scalatest.{BeforeAndAfter, FunSuite}
+import com.google.common.io.Files
 
-import org.apache.spark.{SparkContext, SparkConf, Logging}
+import org.apache.spark.streaming.dstream.{DStream, InputDStream, ForEachDStream}
+import org.apache.spark.streaming.util.ManualClock
+import org.apache.spark.{SparkConf, Logging}
 import org.apache.spark.rdd.RDD
 
 /**
@@ -119,7 +119,12 @@ trait TestSuiteBase extends FunSuite with BeforeAndAfter with Logging {
   def batchDuration = Seconds(1)
 
   // Directory where the checkpoint data will be saved
-  def checkpointDir = "checkpoint"
+  lazy val checkpointDir = {
+    val dir = Files.createTempDir()
+    logDebug(s"checkpointDir: $dir")
+    dir.deleteOnExit()
+    dir.toString
+  }
 
   // Number of partitions of the input parallel collections created for testing
   def numInputPartitions = 2
@@ -242,7 +247,9 @@ trait TestSuiteBase extends FunSuite with BeforeAndAfter with Logging {
     logInfo("numBatches = " + numBatches + ", numExpectedOutput = " + numExpectedOutput)
 
     // Get the output buffer
-    val outputStream = ssc.graph.getOutputStreams.head.asInstanceOf[TestOutputStreamWithPartitions[V]]
+    val outputStream = ssc.graph.getOutputStreams.
+      filter(_.isInstanceOf[TestOutputStreamWithPartitions[_]]).
+      head.asInstanceOf[TestOutputStreamWithPartitions[V]]
     val output = outputStream.output
 
     try {
diff --git a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
index 62b5c3bc5f0f3..4d4848b1bd8f8 100644
--- a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
+++ b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
@@ -72,10 +72,6 @@ class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration,
   private var registered = false
 
   def run() {
-    // Setup the directories so things go to yarn approved directories rather
-    // then user specified and /tmp.
-    System.setProperty("spark.local.dir", getLocalDirs())
-
     // set the web ui port to be ephemeral for yarn so we don't conflict with
     // other spark processes running on the same box
     System.setProperty("spark.ui.port", "0")
@@ -138,20 +134,6 @@ class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration,
       params)
   }
 
-  /** Get the Yarn approved local directories. */
-  private def getLocalDirs(): String = {
-    // Hadoop 0.23 and 2.x have different Environment variable names for the
-    // local dirs, so lets check both. We assume one of the 2 is set.
-    // LOCAL_DIRS => 2.X, YARN_LOCAL_DIRS => 0.23.X
-    val localDirs = Option(System.getenv("YARN_LOCAL_DIRS"))
-      .orElse(Option(System.getenv("LOCAL_DIRS")))
-
-    localDirs match {
-      case None => throw new Exception("Yarn Local dirs can't be empty")
-      case Some(l) => l
-    }
-  }
-
   private def getApplicationAttemptId(): ApplicationAttemptId = {
     val envs = System.getenv()
     val containerIdString = envs.get(ApplicationConstants.AM_CONTAINER_ID_ENV)
@@ -267,12 +249,10 @@ class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration,
       // TODO: This is a bit ugly. Can we make it nicer?
       // TODO: Handle container failure
 
-      // Exists the loop if the user thread exits.
-      while (yarnAllocator.getNumExecutorsRunning < args.numExecutors && userThread.isAlive) {
-        if (yarnAllocator.getNumExecutorsFailed >= maxNumExecutorFailures) {
-          finishApplicationMaster(FinalApplicationStatus.FAILED,
-            "max number of executor failures reached")
-        }
+      // Exits the loop if the user thread exits.
+      while (yarnAllocator.getNumExecutorsRunning < args.numExecutors && userThread.isAlive
+          && !isFinished) {
+        checkNumExecutorsFailed()
         yarnAllocator.allocateContainers(
           math.max(args.numExecutors - yarnAllocator.getNumExecutorsRunning, 0))
         Thread.sleep(ApplicationMaster.ALLOCATE_HEARTBEAT_INTERVAL)
@@ -303,11 +283,8 @@ class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration,
 
     val t = new Thread {
       override def run() {
-        while (userThread.isAlive) {
-          if (yarnAllocator.getNumExecutorsFailed >= maxNumExecutorFailures) {
-            finishApplicationMaster(FinalApplicationStatus.FAILED,
-              "max number of executor failures reached")
-          }
+        while (userThread.isAlive && !isFinished) {
+          checkNumExecutorsFailed()
           val missingExecutorCount = args.numExecutors - yarnAllocator.getNumExecutorsRunning
           if (missingExecutorCount > 0) {
             logInfo("Allocating %d containers to make up for (potentially) lost containers".
@@ -327,6 +304,22 @@ class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration,
     t
   }
 
+  private def checkNumExecutorsFailed() {
+    if (yarnAllocator.getNumExecutorsFailed >= maxNumExecutorFailures) {
+      logInfo("max number of executor failures reached")
+      finishApplicationMaster(FinalApplicationStatus.FAILED,
+        "max number of executor failures reached")
+      // make sure to stop the user thread
+      val sparkContext = ApplicationMaster.sparkContextRef.get()
+      if (sparkContext != null) {
+        logInfo("Invoking sc stop from checkNumExecutorsFailed")
+        sparkContext.stop()
+      } else {
+        logError("sparkContext is null when should shutdown")
+      }
+    }
+  }
+
   private def sendProgress() {
     logDebug("Sending progress")
     // Simulated with an allocate request with no nodes requested ...
diff --git a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ExecutorLauncher.scala b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ExecutorLauncher.scala
index 184e2ad6c82cd..c3310fbc24a98 100644
--- a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ExecutorLauncher.scala
+++ b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ExecutorLauncher.scala
@@ -95,11 +95,6 @@ class ExecutorLauncher(args: ApplicationMasterArguments, conf: Configuration, sp
   }
 
   def run() {
-
-    // Setup the directories so things go to yarn approved directories rather
-    // then user specified and /tmp.
-    System.setProperty("spark.local.dir", getLocalDirs())
-
     appAttemptId = getApplicationAttemptId()
     resourceManager = registerWithResourceManager()
 
@@ -152,20 +147,6 @@ class ExecutorLauncher(args: ApplicationMasterArguments, conf: Configuration, sp
     System.exit(0)
   }
 
-  /** Get the Yarn approved local directories. */
-  private def getLocalDirs(): String = {
-    // Hadoop 0.23 and 2.x have different Environment variable names for the
-    // local dirs, so lets check both. We assume one of the 2 is set.
-    // LOCAL_DIRS => 2.X, YARN_LOCAL_DIRS => 0.23.X
-    val localDirs = Option(System.getenv("YARN_LOCAL_DIRS"))
-      .orElse(Option(System.getenv("LOCAL_DIRS")))
-
-    localDirs match {
-      case None => throw new Exception("Yarn Local dirs can't be empty")
-      case Some(l) => l
-    }
-  }
-
   private def getApplicationAttemptId(): ApplicationAttemptId = {
     val envs = System.getenv()
     val containerIdString = envs.get(ApplicationConstants.AM_CONTAINER_ID_ENV)
@@ -249,7 +230,8 @@ class ExecutorLauncher(args: ApplicationMasterArguments, conf: Configuration, sp
     // Wait until all containers have finished
     // TODO: This is a bit ugly. Can we make it nicer?
     // TODO: Handle container failure
-    while ((yarnAllocator.getNumExecutorsRunning < args.numExecutors) && (!driverClosed)) {
+    while ((yarnAllocator.getNumExecutorsRunning < args.numExecutors) && (!driverClosed) &&
+        !isFinished) {
       yarnAllocator.allocateContainers(
         math.max(args.numExecutors - yarnAllocator.getNumExecutorsRunning, 0))
       checkNumExecutorsFailed()
@@ -271,7 +253,7 @@ class ExecutorLauncher(args: ApplicationMasterArguments, conf: Configuration, sp
 
     val t = new Thread {
       override def run() {
-        while (!driverClosed) {
+        while (!driverClosed && !isFinished) {
           checkNumExecutorsFailed()
           val missingExecutorCount = args.numExecutors - yarnAllocator.getNumExecutorsRunning
           if (missingExecutorCount > 0) {
diff --git a/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala b/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
index 035356d390c80..1c4005fd8e78e 100644
--- a/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
+++ b/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
@@ -72,10 +72,6 @@ class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration,
   private var registered = false
 
   def run() {
-    // Setup the directories so things go to YARN approved directories rather
-    // than user specified and /tmp.
-    System.setProperty("spark.local.dir", getLocalDirs())
-
     // Set the web ui port to be ephemeral for yarn so we don't conflict with
     // other spark processes running on the same box
     System.setProperty("spark.ui.port", "0")
@@ -144,20 +140,6 @@ class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration,
       "spark.org.apache.hadoop.yarn.server.webproxy.amfilter.AmIpFilter.params", params)
   }
 
-  // Get the Yarn approved local directories.
-  private def getLocalDirs(): String = {
-    // Hadoop 0.23 and 2.x have different Environment variable names for the
-    // local dirs, so lets check both. We assume one of the 2 is set.
-    // LOCAL_DIRS => 2.X, YARN_LOCAL_DIRS => 0.23.X
-    val localDirs = Option(System.getenv("YARN_LOCAL_DIRS"))
-      .orElse(Option(System.getenv("LOCAL_DIRS")))
- 
-    localDirs match {
-      case None => throw new Exception("Yarn local dirs can't be empty")
-      case Some(l) => l
-    }
-  }
-
   private def registerApplicationMaster(): RegisterApplicationMasterResponse = {
     logInfo("Registering the ApplicationMaster")
     amClient.registerApplicationMaster(Utils.localHostName(), 0, uiAddress)
@@ -247,13 +229,12 @@ class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration,
       yarnAllocator.allocateResources()
       // Exits the loop if the user thread exits.
 
-      var iters = 0
-      while (yarnAllocator.getNumExecutorsRunning < args.numExecutors && userThread.isAlive) {
+      while (yarnAllocator.getNumExecutorsRunning < args.numExecutors && userThread.isAlive
+          && !isFinished) {
         checkNumExecutorsFailed()
         allocateMissingExecutor()
         yarnAllocator.allocateResources()
         Thread.sleep(ApplicationMaster.ALLOCATE_HEARTBEAT_INTERVAL)
-        iters += 1
       }
     }
     logInfo("All executors have launched.")
@@ -271,8 +252,17 @@ class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration,
 
   private def checkNumExecutorsFailed() {
     if (yarnAllocator.getNumExecutorsFailed >= maxNumExecutorFailures) {
+      logInfo("max number of executor failures reached")
       finishApplicationMaster(FinalApplicationStatus.FAILED,
         "max number of executor failures reached")
+      // make sure to stop the user thread
+      val sparkContext = ApplicationMaster.sparkContextRef.get()
+      if (sparkContext != null) {
+        logInfo("Invoking sc stop from checkNumExecutorsFailed")
+        sparkContext.stop()
+      } else {
+        logError("sparkContext is null when should shutdown")
+      }
     }
   }
 
@@ -289,7 +279,7 @@ class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration,
 
     val t = new Thread {
       override def run() {
-        while (userThread.isAlive) {
+        while (userThread.isAlive && !isFinished) {
           checkNumExecutorsFailed()
           allocateMissingExecutor()
           logDebug("Sending progress")
diff --git a/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/ExecutorLauncher.scala b/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/ExecutorLauncher.scala
index fc7b8320d734d..45925f1fea005 100644
--- a/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/ExecutorLauncher.scala
+++ b/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/ExecutorLauncher.scala
@@ -94,11 +94,6 @@ class ExecutorLauncher(args: ApplicationMasterArguments, conf: Configuration, sp
   }
 
   def run() {
-
-    // Setup the directories so things go to yarn approved directories rather
-    // then user specified and /tmp.
-    System.setProperty("spark.local.dir", getLocalDirs())
-
     amClient = AMRMClient.createAMRMClient()
     amClient.init(yarnConf)
     amClient.start()
@@ -141,20 +136,6 @@ class ExecutorLauncher(args: ApplicationMasterArguments, conf: Configuration, sp
     System.exit(0)
   }
 
-  /** Get the Yarn approved local directories. */
-  private def getLocalDirs(): String = {
-    // Hadoop 0.23 and 2.x have different Environment variable names for the
-    // local dirs, so lets check both. We assume one of the 2 is set.
-    // LOCAL_DIRS => 2.X, YARN_LOCAL_DIRS => 0.23.X
-    val localDirs = Option(System.getenv("YARN_LOCAL_DIRS"))
-      .orElse(Option(System.getenv("LOCAL_DIRS")))
-
-    localDirs match {
-      case None => throw new Exception("Yarn Local dirs can't be empty")
-      case Some(l) => l
-    }
-  }
-
   private def registerApplicationMaster(): RegisterApplicationMasterResponse = {
     val appUIAddress = sparkConf.get("spark.driver.appUIAddress", "")
     logInfo(s"Registering the ApplicationMaster with appUIAddress: $appUIAddress")
@@ -217,7 +198,8 @@ class ExecutorLauncher(args: ApplicationMasterArguments, conf: Configuration, sp
     // Wait until all containers have launched
     yarnAllocator.addResourceRequests(args.numExecutors)
     yarnAllocator.allocateResources()
-    while ((yarnAllocator.getNumExecutorsRunning < args.numExecutors) && (!driverClosed)) {
+    while ((yarnAllocator.getNumExecutorsRunning < args.numExecutors) && (!driverClosed) &&
+        !isFinished) {
       checkNumExecutorsFailed()
       allocateMissingExecutor()
       yarnAllocator.allocateResources()
@@ -249,7 +231,7 @@ class ExecutorLauncher(args: ApplicationMasterArguments, conf: Configuration, sp
 
     val t = new Thread {
       override def run() {
-        while (!driverClosed) {
+        while (!driverClosed && !isFinished) {
           checkNumExecutorsFailed()
           allocateMissingExecutor()
           logDebug("Sending progress")