From bb3bbfd25a33dcad969c18df7dae0b62f01aefb4 Mon Sep 17 00:00:00 2001
From: Prashant Sharma <scrapcodes@gmail.com>
Date: Tue, 18 Nov 2014 19:41:55 +0530
Subject: [PATCH 1/3] SPARK-3962 Marked scope as provided for external.

---
 external/flume/pom.xml                        |   8 +-
 .../streaming/LocalJavaStreamingContext.java  |  40 ++
 .../spark/streaming/TestSuiteBase.scala       | 413 ++++++++++++++++++
 external/kafka/pom.xml                        |   8 +-
 .../spark/streaming/TestSuiteBase.scala       | 413 ++++++++++++++++++
 external/mqtt/pom.xml                         |   8 +-
 .../streaming/LocalJavaStreamingContext.java  |  40 ++
 .../spark/streaming/TestSuiteBase.scala       | 413 ++++++++++++++++++
 external/twitter/pom.xml                      |   8 +-
 .../streaming/LocalJavaStreamingContext.java  |  40 ++
 .../spark/streaming/TestSuiteBase.scala       | 413 ++++++++++++++++++
 external/zeromq/pom.xml                       |   8 +-
 .../streaming/LocalJavaStreamingContext.java  |  40 ++
 .../spark/streaming/TestSuiteBase.scala       | 413 ++++++++++++++++++
 14 files changed, 2230 insertions(+), 35 deletions(-)
 create mode 100644 external/flume/src/test/java/org/apache/spark/streaming/LocalJavaStreamingContext.java
 create mode 100644 external/flume/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala
 create mode 100644 external/kafka/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala
 create mode 100644 external/mqtt/src/test/java/org/apache/spark/streaming/LocalJavaStreamingContext.java
 create mode 100644 external/mqtt/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala
 create mode 100644 external/twitter/src/test/java/org/apache/spark/streaming/LocalJavaStreamingContext.java
 create mode 100644 external/twitter/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala
 create mode 100644 external/zeromq/src/test/java/org/apache/spark/streaming/LocalJavaStreamingContext.java
 create mode 100644 external/zeromq/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala
diff --git a/external/flume/pom.xml b/external/flume/pom.xml
index 7d31e32283d88..c60205dc4141c 100644
--- a/external/flume/pom.xml
+++ b/external/flume/pom.xml
@@ -39,19 +39,13 @@
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-streaming_${scala.binary.version}</artifactId>
       <version>${project.version}</version>
+      <scope>provided</scope>
     </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-streaming-flume-sink_${scala.binary.version}</artifactId>
       <version>${project.version}</version>
     </dependency>
-    <dependency>
-      <groupId>org.apache.spark</groupId>
-      <artifactId>spark-streaming_${scala.binary.version}</artifactId>
-      <version>${project.version}</version>
-      <type>test-jar</type>
-      <scope>test</scope>
-    </dependency>
     <dependency>
       <groupId>org.apache.flume</groupId>
       <artifactId>flume-ng-sdk</artifactId>
diff --git a/external/flume/src/test/java/org/apache/spark/streaming/LocalJavaStreamingContext.java b/external/flume/src/test/java/org/apache/spark/streaming/LocalJavaStreamingContext.java
new file mode 100644
index 0000000000000..6e1f01900071b
--- /dev/null
+++ b/external/flume/src/test/java/org/apache/spark/streaming/LocalJavaStreamingContext.java
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming;
+
+import org.apache.spark.streaming.api.java.JavaStreamingContext;
+import org.junit.After;
+import org.junit.Before;
+
+public abstract class LocalJavaStreamingContext {
+
+    protected transient JavaStreamingContext ssc;
+
+    @Before
+    public void setUp() {
+        System.setProperty("spark.streaming.clock", "org.apache.spark.streaming.util.ManualClock");
+        ssc = new JavaStreamingContext("local[2]", "test", new Duration(1000));
+        ssc.checkpoint("checkpoint");
+    }
+
+    @After
+    public void tearDown() {
+        ssc.stop();
+        ssc = null;
+    }
+}
diff --git a/external/flume/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala b/external/flume/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala
new file mode 100644
index 0000000000000..76b3b73a2ff3b
--- /dev/null
+++ b/external/flume/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala
@@ -0,0 +1,413 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming
+
+import java.io.{ObjectInputStream, IOException}
+
+import scala.collection.mutable.ArrayBuffer
+import scala.collection.mutable.SynchronizedBuffer
+import scala.reflect.ClassTag
+
+import org.scalatest.{BeforeAndAfter, FunSuite}
+
+import org.apache.spark.streaming.dstream.{DStream, InputDStream, ForEachDStream}
+import org.apache.spark.streaming.util.ManualClock
+import org.apache.spark.{SparkConf, Logging}
+import org.apache.spark.rdd.RDD
+import org.apache.spark.util.Utils
+
+/**
+ * This is a input stream just for the testsuites. This is equivalent to a checkpointable,
+ * replayable, reliable message queue like Kafka. It requires a sequence as input, and
+ * returns the i_th element at the i_th batch unde manual clock.
+ */
+class TestInputStream[T: ClassTag](ssc_ : StreamingContext, input: Seq[Seq[T]], numPartitions: Int)
+  extends InputDStream[T](ssc_) {
+
+  def start() {}
+
+  def stop() {}
+
+  def compute(validTime: Time): Option[RDD[T]] = {
+    logInfo("Computing RDD for time " + validTime)
+    val index = ((validTime - zeroTime) / slideDuration - 1).toInt
+    val selectedInput = if (index < input.size) input(index) else Seq[T]()
+
+    // lets us test cases where RDDs are not created
+    if (selectedInput == null)
+      return None
+
+    val rdd = ssc.sc.makeRDD(selectedInput, numPartitions)
+    logInfo("Created RDD " + rdd.id + " with " + selectedInput)
+    Some(rdd)
+  }
+}
+
+/**
+ * This is a output stream just for the testsuites. All the output is collected into a
+ * ArrayBuffer. This buffer is wiped clean on being restored from checkpoint.
+ *
+ * The buffer contains a sequence of RDD's, each containing a sequence of items
+ */
+class TestOutputStream[T: ClassTag](parent: DStream[T],
+    val output: ArrayBuffer[Seq[T]] = ArrayBuffer[Seq[T]]())
+  extends ForEachDStream[T](parent, (rdd: RDD[T], t: Time) => {
+    val collected = rdd.collect()
+    output += collected
+  }) {
+
+  // This is to clear the output buffer every it is read from a checkpoint
+  @throws(classOf[IOException])
+  private def readObject(ois: ObjectInputStream): Unit = Utils.tryOrIOException {
+    ois.defaultReadObject()
+    output.clear()
+  }
+}
+
+/**
+ * This is a output stream just for the testsuites. All the output is collected into a
+ * ArrayBuffer. This buffer is wiped clean on being restored from checkpoint.
+ *
+ * The buffer contains a sequence of RDD's, each containing a sequence of partitions, each
+ * containing a sequence of items.
+ */
+class TestOutputStreamWithPartitions[T: ClassTag](parent: DStream[T],
+    val output: ArrayBuffer[Seq[Seq[T]]] = ArrayBuffer[Seq[Seq[T]]]())
+  extends ForEachDStream[T](parent, (rdd: RDD[T], t: Time) => {
+    val collected = rdd.glom().collect().map(_.toSeq)
+    output += collected
+  }) {
+
+  // This is to clear the output buffer every it is read from a checkpoint
+  @throws(classOf[IOException])
+  private def readObject(ois: ObjectInputStream): Unit = Utils.tryOrIOException {
+    ois.defaultReadObject()
+    output.clear()
+  }
+
+  def toTestOutputStream = new TestOutputStream[T](this.parent, this.output.map(_.flatten))
+}
+
+/**
+ * This is the base trait for Spark Streaming testsuites. This provides basic functionality
+ * to run user-defined set of input on user-defined stream operations, and verify the output.
+ */
+trait TestSuiteBase extends FunSuite with BeforeAndAfter with Logging {
+
+  // Name of the framework for Spark context
+  def framework = this.getClass.getSimpleName
+
+  // Master for Spark context
+  def master = "local[2]"
+
+  // Batch duration
+  def batchDuration = Seconds(1)
+
+  // Directory where the checkpoint data will be saved
+  lazy val checkpointDir = {
+    val dir = Utils.createTempDir()
+    logDebug(s"checkpointDir: $dir")
+    dir.toString
+  }
+
+  // Number of partitions of the input parallel collections created for testing
+  def numInputPartitions = 2
+
+  // Maximum time to wait before the test times out
+  def maxWaitTimeMillis = 10000
+
+  // Whether to use manual clock or not
+  def useManualClock = true
+
+  // Whether to actually wait in real time before changing manual clock
+  def actuallyWait = false
+
+  //// A SparkConf to use in tests. Can be modified before calling setupStreams to configure things.
+  val conf = new SparkConf()
+    .setMaster(master)
+    .setAppName(framework)
+
+  // Default before function for any streaming test suite. Override this
+  // if you want to add your stuff to "before" (i.e., don't call before { } )
+  def beforeFunction() {
+    if (useManualClock) {
+      logInfo("Using manual clock")
+      conf.set("spark.streaming.clock", "org.apache.spark.streaming.util.ManualClock")
+    } else {
+      logInfo("Using real clock")
+      conf.set("spark.streaming.clock", "org.apache.spark.streaming.util.SystemClock")
+    }
+  }
+
+  // Default after function for any streaming test suite. Override this
+  // if you want to add your stuff to "after" (i.e., don't call after { } )
+  def afterFunction() {
+    System.clearProperty("spark.streaming.clock")
+  }
+
+  before(beforeFunction)
+  after(afterFunction)
+
+  /**
+   * Run a block of code with the given StreamingContext and automatically
+   * stop the context when the block completes or when an exception is thrown.
+   */
+  def withStreamingContext[R](ssc: StreamingContext)(block: StreamingContext => R): R = {
+    try {
+      block(ssc)
+    } finally {
+      try {
+        ssc.stop(stopSparkContext = true)
+      } catch {
+        case e: Exception =>
+          logError("Error stopping StreamingContext", e)
+      }
+    }
+  }
+
+  /**
+   * Set up required DStreams to test the DStream operation using the two sequences
+   * of input collections.
+   */
+  def setupStreams[U: ClassTag, V: ClassTag](
+      input: Seq[Seq[U]],
+      operation: DStream[U] => DStream[V],
+      numPartitions: Int = numInputPartitions
+    ): StreamingContext = {
+    // Create StreamingContext
+    val ssc = new StreamingContext(conf, batchDuration)
+    if (checkpointDir != null) {
+      ssc.checkpoint(checkpointDir)
+    }
+
+    // Setup the stream computation
+    val inputStream = new TestInputStream(ssc, input, numPartitions)
+    val operatedStream = operation(inputStream)
+    val outputStream = new TestOutputStreamWithPartitions(operatedStream,
+      new ArrayBuffer[Seq[Seq[V]]] with SynchronizedBuffer[Seq[Seq[V]]])
+    outputStream.register()
+    ssc
+  }
+
+  /**
+   * Set up required DStreams to test the binary operation using the sequence
+   * of input collections.
+   */
+  def setupStreams[U: ClassTag, V: ClassTag, W: ClassTag](
+      input1: Seq[Seq[U]],
+      input2: Seq[Seq[V]],
+      operation: (DStream[U], DStream[V]) => DStream[W]
+    ): StreamingContext = {
+    // Create StreamingContext
+    val ssc = new StreamingContext(conf, batchDuration)
+    if (checkpointDir != null) {
+      ssc.checkpoint(checkpointDir)
+    }
+
+    // Setup the stream computation
+    val inputStream1 = new TestInputStream(ssc, input1, numInputPartitions)
+    val inputStream2 = new TestInputStream(ssc, input2, numInputPartitions)
+    val operatedStream = operation(inputStream1, inputStream2)
+    val outputStream = new TestOutputStreamWithPartitions(operatedStream,
+      new ArrayBuffer[Seq[Seq[W]]] with SynchronizedBuffer[Seq[Seq[W]]])
+    outputStream.register()
+    ssc
+  }
+
+  /**
+   * Runs the streams set up in `ssc` on manual clock for `numBatches` batches and
+   * returns the collected output. It will wait until `numExpectedOutput` number of
+   * output data has been collected or timeout (set by `maxWaitTimeMillis`) is reached.
+   *
+   * Returns a sequence of items for each RDD.
+   */
+  def runStreams[V: ClassTag](
+      ssc: StreamingContext,
+      numBatches: Int,
+      numExpectedOutput: Int
+    ): Seq[Seq[V]] = {
+    // Flatten each RDD into a single Seq
+    runStreamsWithPartitions(ssc, numBatches, numExpectedOutput).map(_.flatten.toSeq)
+  }
+
+  /**
+   * Runs the streams set up in `ssc` on manual clock for `numBatches` batches and
+   * returns the collected output. It will wait until `numExpectedOutput` number of
+   * output data has been collected or timeout (set by `maxWaitTimeMillis`) is reached.
+   *
+   * Returns a sequence of RDD's. Each RDD is represented as several sequences of items, each
+   * representing one partition.
+   */
+  def runStreamsWithPartitions[V: ClassTag](
+      ssc: StreamingContext,
+      numBatches: Int,
+      numExpectedOutput: Int
+    ): Seq[Seq[Seq[V]]] = {
+    assert(numBatches > 0, "Number of batches to run stream computation is zero")
+    assert(numExpectedOutput > 0, "Number of expected outputs after " + numBatches + " is zero")
+    logInfo("numBatches = " + numBatches + ", numExpectedOutput = " + numExpectedOutput)
+
+    // Get the output buffer
+    val outputStream = ssc.graph.getOutputStreams.
+      filter(_.isInstanceOf[TestOutputStreamWithPartitions[_]]).
+      head.asInstanceOf[TestOutputStreamWithPartitions[V]]
+    val output = outputStream.output
+
+    try {
+      // Start computation
+      ssc.start()
+
+      // Advance manual clock
+      val clock = ssc.scheduler.clock.asInstanceOf[ManualClock]
+      logInfo("Manual clock before advancing = " + clock.time)
+      if (actuallyWait) {
+        for (i <- 1 to numBatches) {
+          logInfo("Actually waiting for " + batchDuration)
+          clock.addToTime(batchDuration.milliseconds)
+          Thread.sleep(batchDuration.milliseconds)
+        }
+      } else {
+        clock.addToTime(numBatches * batchDuration.milliseconds)
+      }
+      logInfo("Manual clock after advancing = " + clock.time)
+
+      // Wait until expected number of output items have been generated
+      val startTime = System.currentTimeMillis()
+      while (output.size < numExpectedOutput && System.currentTimeMillis() - startTime < maxWaitTimeMillis) {
+        logInfo("output.size = " + output.size + ", numExpectedOutput = " + numExpectedOutput)
+        ssc.awaitTermination(50)
+      }
+      val timeTaken = System.currentTimeMillis() - startTime
+      logInfo("Output generated in " + timeTaken + " milliseconds")
+      output.foreach(x => logInfo("[" + x.mkString(",") + "]"))
+      assert(timeTaken < maxWaitTimeMillis, "Operation timed out after " + timeTaken + " ms")
+      assert(output.size === numExpectedOutput, "Unexpected number of outputs generated")
+
+      Thread.sleep(100) // Give some time for the forgetting old RDDs to complete
+    } finally {
+      ssc.stop(stopSparkContext = true)
+    }
+    output
+  }
+
+  /**
+   * Verify whether the output values after running a DStream operation
+   * is same as the expected output values, by comparing the output
+   * collections either as lists (order matters) or sets (order does not matter)
+   */
+  def verifyOutput[V: ClassTag](
+      output: Seq[Seq[V]],
+      expectedOutput: Seq[Seq[V]],
+      useSet: Boolean
+    ) {
+    logInfo("--------------------------------")
+    logInfo("output.size = " + output.size)
+    logInfo("output")
+    output.foreach(x => logInfo("[" + x.mkString(",") + "]"))
+    logInfo("expected output.size = " + expectedOutput.size)
+    logInfo("expected output")
+    expectedOutput.foreach(x => logInfo("[" + x.mkString(",") + "]"))
+    logInfo("--------------------------------")
+
+    // Match the output with the expected output
+    assert(output.size === expectedOutput.size, "Number of outputs do not match")
+    for (i <- 0 until output.size) {
+      if (useSet) {
+        assert(output(i).toSet === expectedOutput(i).toSet)
+      } else {
+        assert(output(i).toList === expectedOutput(i).toList)
+      }
+    }
+    logInfo("Output verified successfully")
+  }
+
+  /**
+   * Test unary DStream operation with a list of inputs, with number of
+   * batches to run same as the number of expected output values
+   */
+  def testOperation[U: ClassTag, V: ClassTag](
+      input: Seq[Seq[U]],
+      operation: DStream[U] => DStream[V],
+      expectedOutput: Seq[Seq[V]],
+      useSet: Boolean = false
+    ) {
+    testOperation[U, V](input, operation, expectedOutput, -1, useSet)
+  }
+
+  /**
+   * Test unary DStream operation with a list of inputs
+   * @param input      Sequence of input collections
+   * @param operation  Binary DStream operation to be applied to the 2 inputs
+   * @param expectedOutput Sequence of expected output collections
+   * @param numBatches Number of batches to run the operation for
+   * @param useSet     Compare the output values with the expected output values
+   *                   as sets (order matters) or as lists (order does not matter)
+   */
+  def testOperation[U: ClassTag, V: ClassTag](
+      input: Seq[Seq[U]],
+      operation: DStream[U] => DStream[V],
+      expectedOutput: Seq[Seq[V]],
+      numBatches: Int,
+      useSet: Boolean
+    ) {
+    val numBatches_ = if (numBatches > 0) numBatches else expectedOutput.size
+    withStreamingContext(setupStreams[U, V](input, operation)) { ssc =>
+      val output = runStreams[V](ssc, numBatches_, expectedOutput.size)
+      verifyOutput[V](output, expectedOutput, useSet)
+    }
+  }
+
+  /**
+   * Test binary DStream operation with two lists of inputs, with number of
+   * batches to run same as the number of expected output values
+   */
+  def testOperation[U: ClassTag, V: ClassTag, W: ClassTag](
+      input1: Seq[Seq[U]],
+      input2: Seq[Seq[V]],
+      operation: (DStream[U], DStream[V]) => DStream[W],
+      expectedOutput: Seq[Seq[W]],
+      useSet: Boolean
+    ) {
+    testOperation[U, V, W](input1, input2, operation, expectedOutput, -1, useSet)
+  }
+
+  /**
+   * Test binary DStream operation with two lists of inputs
+   * @param input1     First sequence of input collections
+   * @param input2     Second sequence of input collections
+   * @param operation  Binary DStream operation to be applied to the 2 inputs
+   * @param expectedOutput Sequence of expected output collections
+   * @param numBatches Number of batches to run the operation for
+   * @param useSet     Compare the output values with the expected output values
+   *                   as sets (order matters) or as lists (order does not matter)
+   */
+  def testOperation[U: ClassTag, V: ClassTag, W: ClassTag](
+      input1: Seq[Seq[U]],
+      input2: Seq[Seq[V]],
+      operation: (DStream[U], DStream[V]) => DStream[W],
+      expectedOutput: Seq[Seq[W]],
+      numBatches: Int,
+      useSet: Boolean
+    ) {
+    val numBatches_ = if (numBatches > 0) numBatches else expectedOutput.size
+    withStreamingContext(setupStreams[U, V, W](input1, input2, operation)) { ssc =>
+      val output = runStreams[W](ssc, numBatches_, expectedOutput.size)
+      verifyOutput[W](output, expectedOutput, useSet)
+    }
+  }
+}
diff --git a/external/kafka/pom.xml b/external/kafka/pom.xml
index 2067c473f0e3f..5761ba5e4a971 100644
--- a/external/kafka/pom.xml
+++ b/external/kafka/pom.xml
@@ -39,13 +39,7 @@
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-streaming_${scala.binary.version}</artifactId>
       <version>${project.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.spark</groupId>
-      <artifactId>spark-streaming_${scala.binary.version}</artifactId>
-      <version>${project.version}</version>
-      <type>test-jar</type>
-      <scope>test</scope>
+      <scope>provided</scope>
     </dependency>
     <dependency>
       <groupId>org.apache.kafka</groupId>
diff --git a/external/kafka/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala b/external/kafka/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala
new file mode 100644
index 0000000000000..76b3b73a2ff3b
--- /dev/null
+++ b/external/kafka/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala
@@ -0,0 +1,413 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming
+
+import java.io.{ObjectInputStream, IOException}
+
+import scala.collection.mutable.ArrayBuffer
+import scala.collection.mutable.SynchronizedBuffer
+import scala.reflect.ClassTag
+
+import org.scalatest.{BeforeAndAfter, FunSuite}
+
+import org.apache.spark.streaming.dstream.{DStream, InputDStream, ForEachDStream}
+import org.apache.spark.streaming.util.ManualClock
+import org.apache.spark.{SparkConf, Logging}
+import org.apache.spark.rdd.RDD
+import org.apache.spark.util.Utils
+
+/**
+ * This is a input stream just for the testsuites. This is equivalent to a checkpointable,
+ * replayable, reliable message queue like Kafka. It requires a sequence as input, and
+ * returns the i_th element at the i_th batch unde manual clock.
+ */
+class TestInputStream[T: ClassTag](ssc_ : StreamingContext, input: Seq[Seq[T]], numPartitions: Int)
+  extends InputDStream[T](ssc_) {
+
+  def start() {}
+
+  def stop() {}
+
+  def compute(validTime: Time): Option[RDD[T]] = {
+    logInfo("Computing RDD for time " + validTime)
+    val index = ((validTime - zeroTime) / slideDuration - 1).toInt
+    val selectedInput = if (index < input.size) input(index) else Seq[T]()
+
+    // lets us test cases where RDDs are not created
+    if (selectedInput == null)
+      return None
+
+    val rdd = ssc.sc.makeRDD(selectedInput, numPartitions)
+    logInfo("Created RDD " + rdd.id + " with " + selectedInput)
+    Some(rdd)
+  }
+}
+
+/**
+ * This is a output stream just for the testsuites. All the output is collected into a
+ * ArrayBuffer. This buffer is wiped clean on being restored from checkpoint.
+ *
+ * The buffer contains a sequence of RDD's, each containing a sequence of items
+ */
+class TestOutputStream[T: ClassTag](parent: DStream[T],
+    val output: ArrayBuffer[Seq[T]] = ArrayBuffer[Seq[T]]())
+  extends ForEachDStream[T](parent, (rdd: RDD[T], t: Time) => {
+    val collected = rdd.collect()
+    output += collected
+  }) {
+
+  // This is to clear the output buffer every it is read from a checkpoint
+  @throws(classOf[IOException])
+  private def readObject(ois: ObjectInputStream): Unit = Utils.tryOrIOException {
+    ois.defaultReadObject()
+    output.clear()
+  }
+}
+
+/**
+ * This is a output stream just for the testsuites. All the output is collected into a
+ * ArrayBuffer. This buffer is wiped clean on being restored from checkpoint.
+ *
+ * The buffer contains a sequence of RDD's, each containing a sequence of partitions, each
+ * containing a sequence of items.
+ */
+class TestOutputStreamWithPartitions[T: ClassTag](parent: DStream[T],
+    val output: ArrayBuffer[Seq[Seq[T]]] = ArrayBuffer[Seq[Seq[T]]]())
+  extends ForEachDStream[T](parent, (rdd: RDD[T], t: Time) => {
+    val collected = rdd.glom().collect().map(_.toSeq)
+    output += collected
+  }) {
+
+  // This is to clear the output buffer every it is read from a checkpoint
+  @throws(classOf[IOException])
+  private def readObject(ois: ObjectInputStream): Unit = Utils.tryOrIOException {
+    ois.defaultReadObject()
+    output.clear()
+  }
+
+  def toTestOutputStream = new TestOutputStream[T](this.parent, this.output.map(_.flatten))
+}
+
+/**
+ * This is the base trait for Spark Streaming testsuites. This provides basic functionality
+ * to run user-defined set of input on user-defined stream operations, and verify the output.
+ */
+trait TestSuiteBase extends FunSuite with BeforeAndAfter with Logging {
+
+  // Name of the framework for Spark context
+  def framework = this.getClass.getSimpleName
+
+  // Master for Spark context
+  def master = "local[2]"
+
+  // Batch duration
+  def batchDuration = Seconds(1)
+
+  // Directory where the checkpoint data will be saved
+  lazy val checkpointDir = {
+    val dir = Utils.createTempDir()
+    logDebug(s"checkpointDir: $dir")
+    dir.toString
+  }
+
+  // Number of partitions of the input parallel collections created for testing
+  def numInputPartitions = 2
+
+  // Maximum time to wait before the test times out
+  def maxWaitTimeMillis = 10000
+
+  // Whether to use manual clock or not
+  def useManualClock = true
+
+  // Whether to actually wait in real time before changing manual clock
+  def actuallyWait = false
+
+  //// A SparkConf to use in tests. Can be modified before calling setupStreams to configure things.
+  val conf = new SparkConf()
+    .setMaster(master)
+    .setAppName(framework)
+
+  // Default before function for any streaming test suite. Override this
+  // if you want to add your stuff to "before" (i.e., don't call before { } )
+  def beforeFunction() {
+    if (useManualClock) {
+      logInfo("Using manual clock")
+      conf.set("spark.streaming.clock", "org.apache.spark.streaming.util.ManualClock")
+    } else {
+      logInfo("Using real clock")
+      conf.set("spark.streaming.clock", "org.apache.spark.streaming.util.SystemClock")
+    }
+  }
+
+  // Default after function for any streaming test suite. Override this
+  // if you want to add your stuff to "after" (i.e., don't call after { } )
+  def afterFunction() {
+    System.clearProperty("spark.streaming.clock")
+  }
+
+  before(beforeFunction)
+  after(afterFunction)
+
+  /**
+   * Run a block of code with the given StreamingContext and automatically
+   * stop the context when the block completes or when an exception is thrown.
+   */
+  def withStreamingContext[R](ssc: StreamingContext)(block: StreamingContext => R): R = {
+    try {
+      block(ssc)
+    } finally {
+      try {
+        ssc.stop(stopSparkContext = true)
+      } catch {
+        case e: Exception =>
+          logError("Error stopping StreamingContext", e)
+      }
+    }
+  }
+
+  /**
+   * Set up required DStreams to test the DStream operation using the two sequences
+   * of input collections.
+   */
+  def setupStreams[U: ClassTag, V: ClassTag](
+      input: Seq[Seq[U]],
+      operation: DStream[U] => DStream[V],
+      numPartitions: Int = numInputPartitions
+    ): StreamingContext = {
+    // Create StreamingContext
+    val ssc = new StreamingContext(conf, batchDuration)
+    if (checkpointDir != null) {
+      ssc.checkpoint(checkpointDir)
+    }
+
+    // Setup the stream computation
+    val inputStream = new TestInputStream(ssc, input, numPartitions)
+    val operatedStream = operation(inputStream)
+    val outputStream = new TestOutputStreamWithPartitions(operatedStream,
+      new ArrayBuffer[Seq[Seq[V]]] with SynchronizedBuffer[Seq[Seq[V]]])
+    outputStream.register()
+    ssc
+  }
+
+  /**
+   * Set up required DStreams to test the binary operation using the sequence
+   * of input collections.
+   */
+  def setupStreams[U: ClassTag, V: ClassTag, W: ClassTag](
+      input1: Seq[Seq[U]],
+      input2: Seq[Seq[V]],
+      operation: (DStream[U], DStream[V]) => DStream[W]
+    ): StreamingContext = {
+    // Create StreamingContext
+    val ssc = new StreamingContext(conf, batchDuration)
+    if (checkpointDir != null) {
+      ssc.checkpoint(checkpointDir)
+    }
+
+    // Setup the stream computation
+    val inputStream1 = new TestInputStream(ssc, input1, numInputPartitions)
+    val inputStream2 = new TestInputStream(ssc, input2, numInputPartitions)
+    val operatedStream = operation(inputStream1, inputStream2)
+    val outputStream = new TestOutputStreamWithPartitions(operatedStream,
+      new ArrayBuffer[Seq[Seq[W]]] with SynchronizedBuffer[Seq[Seq[W]]])
+    outputStream.register()
+    ssc
+  }
+
+  /**
+   * Runs the streams set up in `ssc` on manual clock for `numBatches` batches and
+   * returns the collected output. It will wait until `numExpectedOutput` number of
+   * output data has been collected or timeout (set by `maxWaitTimeMillis`) is reached.
+   *
+   * Returns a sequence of items for each RDD.
+   */
+  def runStreams[V: ClassTag](
+      ssc: StreamingContext,
+      numBatches: Int,
+      numExpectedOutput: Int
+    ): Seq[Seq[V]] = {
+    // Flatten each RDD into a single Seq
+    runStreamsWithPartitions(ssc, numBatches, numExpectedOutput).map(_.flatten.toSeq)
+  }
+
+  /**
+   * Runs the streams set up in `ssc` on manual clock for `numBatches` batches and
+   * returns the collected output. It will wait until `numExpectedOutput` number of
+   * output data has been collected or timeout (set by `maxWaitTimeMillis`) is reached.
+   *
+   * Returns a sequence of RDD's. Each RDD is represented as several sequences of items, each
+   * representing one partition.
+   */
+  def runStreamsWithPartitions[V: ClassTag](
+      ssc: StreamingContext,
+      numBatches: Int,
+      numExpectedOutput: Int
+    ): Seq[Seq[Seq[V]]] = {
+    assert(numBatches > 0, "Number of batches to run stream computation is zero")
+    assert(numExpectedOutput > 0, "Number of expected outputs after " + numBatches + " is zero")
+    logInfo("numBatches = " + numBatches + ", numExpectedOutput = " + numExpectedOutput)
+
+    // Get the output buffer
+    val outputStream = ssc.graph.getOutputStreams.
+      filter(_.isInstanceOf[TestOutputStreamWithPartitions[_]]).
+      head.asInstanceOf[TestOutputStreamWithPartitions[V]]
+    val output = outputStream.output
+
+    try {
+      // Start computation
+      ssc.start()
+
+      // Advance manual clock
+      val clock = ssc.scheduler.clock.asInstanceOf[ManualClock]
+      logInfo("Manual clock before advancing = " + clock.time)
+      if (actuallyWait) {
+        for (i <- 1 to numBatches) {
+          logInfo("Actually waiting for " + batchDuration)
+          clock.addToTime(batchDuration.milliseconds)
+          Thread.sleep(batchDuration.milliseconds)
+        }
+      } else {
+        clock.addToTime(numBatches * batchDuration.milliseconds)
+      }
+      logInfo("Manual clock after advancing = " + clock.time)
+
+      // Wait until expected number of output items have been generated
+      val startTime = System.currentTimeMillis()
+      while (output.size < numExpectedOutput && System.currentTimeMillis() - startTime < maxWaitTimeMillis) {
+        logInfo("output.size = " + output.size + ", numExpectedOutput = " + numExpectedOutput)
+        ssc.awaitTermination(50)
+      }
+      val timeTaken = System.currentTimeMillis() - startTime
+      logInfo("Output generated in " + timeTaken + " milliseconds")
+      output.foreach(x => logInfo("[" + x.mkString(",") + "]"))
+      assert(timeTaken < maxWaitTimeMillis, "Operation timed out after " + timeTaken + " ms")
+      assert(output.size === numExpectedOutput, "Unexpected number of outputs generated")
+
+      Thread.sleep(100) // Give some time for the forgetting old RDDs to complete
+    } finally {
+      ssc.stop(stopSparkContext = true)
+    }
+    output
+  }
+
+  /**
+   * Verify whether the output values after running a DStream operation
+   * is same as the expected output values, by comparing the output
+   * collections either as lists (order matters) or sets (order does not matter)
+   */
+  def verifyOutput[V: ClassTag](
+      output: Seq[Seq[V]],
+      expectedOutput: Seq[Seq[V]],
+      useSet: Boolean
+    ) {
+    logInfo("--------------------------------")
+    logInfo("output.size = " + output.size)
+    logInfo("output")
+    output.foreach(x => logInfo("[" + x.mkString(",") + "]"))
+    logInfo("expected output.size = " + expectedOutput.size)
+    logInfo("expected output")
+    expectedOutput.foreach(x => logInfo("[" + x.mkString(",") + "]"))
+    logInfo("--------------------------------")
+
+    // Match the output with the expected output
+    assert(output.size === expectedOutput.size, "Number of outputs do not match")
+    for (i <- 0 until output.size) {
+      if (useSet) {
+        assert(output(i).toSet === expectedOutput(i).toSet)
+      } else {
+        assert(output(i).toList === expectedOutput(i).toList)
+      }
+    }
+    logInfo("Output verified successfully")
+  }
+
+  /**
+   * Test unary DStream operation with a list of inputs, with number of
+   * batches to run same as the number of expected output values
+   */
+  def testOperation[U: ClassTag, V: ClassTag](
+      input: Seq[Seq[U]],
+      operation: DStream[U] => DStream[V],
+      expectedOutput: Seq[Seq[V]],
+      useSet: Boolean = false
+    ) {
+    testOperation[U, V](input, operation, expectedOutput, -1, useSet)
+  }
+
+  /**
+   * Test unary DStream operation with a list of inputs
+   * @param input      Sequence of input collections
+   * @param operation  Binary DStream operation to be applied to the 2 inputs
+   * @param expectedOutput Sequence of expected output collections
+   * @param numBatches Number of batches to run the operation for
+   * @param useSet     Compare the output values with the expected output values
+   *                   as sets (order matters) or as lists (order does not matter)
+   */
+  def testOperation[U: ClassTag, V: ClassTag](
+      input: Seq[Seq[U]],
+      operation: DStream[U] => DStream[V],
+      expectedOutput: Seq[Seq[V]],
+      numBatches: Int,
+      useSet: Boolean
+    ) {
+    val numBatches_ = if (numBatches > 0) numBatches else expectedOutput.size
+    withStreamingContext(setupStreams[U, V](input, operation)) { ssc =>
+      val output = runStreams[V](ssc, numBatches_, expectedOutput.size)
+      verifyOutput[V](output, expectedOutput, useSet)
+    }
+  }
+
+  /**
+   * Test binary DStream operation with two lists of inputs, with number of
+   * batches to run same as the number of expected output values
+   */
+  def testOperation[U: ClassTag, V: ClassTag, W: ClassTag](
+      input1: Seq[Seq[U]],
+      input2: Seq[Seq[V]],
+      operation: (DStream[U], DStream[V]) => DStream[W],
+      expectedOutput: Seq[Seq[W]],
+      useSet: Boolean
+    ) {
+    testOperation[U, V, W](input1, input2, operation, expectedOutput, -1, useSet)
+  }
+
+  /**
+   * Test binary DStream operation with two lists of inputs
+   * @param input1     First sequence of input collections
+   * @param input2     Second sequence of input collections
+   * @param operation  Binary DStream operation to be applied to the 2 inputs
+   * @param expectedOutput Sequence of expected output collections
+   * @param numBatches Number of batches to run the operation for
+   * @param useSet     Compare the output values with the expected output values
+   *                   as sets (order matters) or as lists (order does not matter)
+   */
+  def testOperation[U: ClassTag, V: ClassTag, W: ClassTag](
+      input1: Seq[Seq[U]],
+      input2: Seq[Seq[V]],
+      operation: (DStream[U], DStream[V]) => DStream[W],
+      expectedOutput: Seq[Seq[W]],
+      numBatches: Int,
+      useSet: Boolean
+    ) {
+    val numBatches_ = if (numBatches > 0) numBatches else expectedOutput.size
+    withStreamingContext(setupStreams[U, V, W](input1, input2, operation)) { ssc =>
+      val output = runStreams[W](ssc, numBatches_, expectedOutput.size)
+      verifyOutput[W](output, expectedOutput, useSet)
+    }
+  }
+}
diff --git a/external/mqtt/pom.xml b/external/mqtt/pom.xml
index 362a76e515938..e1b816a43b0ec 100644
--- a/external/mqtt/pom.xml
+++ b/external/mqtt/pom.xml
@@ -39,13 +39,7 @@
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-streaming_${scala.binary.version}</artifactId>
       <version>${project.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.spark</groupId>
-      <artifactId>spark-streaming_${scala.binary.version}</artifactId>
-      <version>${project.version}</version>
-      <type>test-jar</type>
-      <scope>test</scope>
+      <scope>provided</scope>
     </dependency>
     <dependency>
       <groupId>org.eclipse.paho</groupId>
diff --git a/external/mqtt/src/test/java/org/apache/spark/streaming/LocalJavaStreamingContext.java b/external/mqtt/src/test/java/org/apache/spark/streaming/LocalJavaStreamingContext.java
new file mode 100644
index 0000000000000..6e1f01900071b
--- /dev/null
+++ b/external/mqtt/src/test/java/org/apache/spark/streaming/LocalJavaStreamingContext.java
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming;
+
+import org.apache.spark.streaming.api.java.JavaStreamingContext;
+import org.junit.After;
+import org.junit.Before;
+
+public abstract class LocalJavaStreamingContext {
+
+    protected transient JavaStreamingContext ssc;
+
+    @Before
+    public void setUp() {
+        System.setProperty("spark.streaming.clock", "org.apache.spark.streaming.util.ManualClock");
+        ssc = new JavaStreamingContext("local[2]", "test", new Duration(1000));
+        ssc.checkpoint("checkpoint");
+    }
+
+    @After
+    public void tearDown() {
+        ssc.stop();
+        ssc = null;
+    }
+}
diff --git a/external/mqtt/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala b/external/mqtt/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala
new file mode 100644
index 0000000000000..76b3b73a2ff3b
--- /dev/null
+++ b/external/mqtt/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala
@@ -0,0 +1,413 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming
+
+import java.io.{ObjectInputStream, IOException}
+
+import scala.collection.mutable.ArrayBuffer
+import scala.collection.mutable.SynchronizedBuffer
+import scala.reflect.ClassTag
+
+import org.scalatest.{BeforeAndAfter, FunSuite}
+
+import org.apache.spark.streaming.dstream.{DStream, InputDStream, ForEachDStream}
+import org.apache.spark.streaming.util.ManualClock
+import org.apache.spark.{SparkConf, Logging}
+import org.apache.spark.rdd.RDD
+import org.apache.spark.util.Utils
+
+/**
+ * This is a input stream just for the testsuites. This is equivalent to a checkpointable,
+ * replayable, reliable message queue like Kafka. It requires a sequence as input, and
+ * returns the i_th element at the i_th batch unde manual clock.
+ */
+class TestInputStream[T: ClassTag](ssc_ : StreamingContext, input: Seq[Seq[T]], numPartitions: Int)
+  extends InputDStream[T](ssc_) {
+
+  def start() {}
+
+  def stop() {}
+
+  def compute(validTime: Time): Option[RDD[T]] = {
+    logInfo("Computing RDD for time " + validTime)
+    val index = ((validTime - zeroTime) / slideDuration - 1).toInt
+    val selectedInput = if (index < input.size) input(index) else Seq[T]()
+
+    // lets us test cases where RDDs are not created
+    if (selectedInput == null)
+      return None
+
+    val rdd = ssc.sc.makeRDD(selectedInput, numPartitions)
+    logInfo("Created RDD " + rdd.id + " with " + selectedInput)
+    Some(rdd)
+  }
+}
+
+/**
+ * This is a output stream just for the testsuites. All the output is collected into a
+ * ArrayBuffer. This buffer is wiped clean on being restored from checkpoint.
+ *
+ * The buffer contains a sequence of RDD's, each containing a sequence of items
+ */
+class TestOutputStream[T: ClassTag](parent: DStream[T],
+    val output: ArrayBuffer[Seq[T]] = ArrayBuffer[Seq[T]]())
+  extends ForEachDStream[T](parent, (rdd: RDD[T], t: Time) => {
+    val collected = rdd.collect()
+    output += collected
+  }) {
+
+  // This is to clear the output buffer every it is read from a checkpoint
+  @throws(classOf[IOException])
+  private def readObject(ois: ObjectInputStream): Unit = Utils.tryOrIOException {
+    ois.defaultReadObject()
+    output.clear()
+  }
+}
+
+/**
+ * This is a output stream just for the testsuites. All the output is collected into a
+ * ArrayBuffer. This buffer is wiped clean on being restored from checkpoint.
+ *
+ * The buffer contains a sequence of RDD's, each containing a sequence of partitions, each
+ * containing a sequence of items.
+ */
+class TestOutputStreamWithPartitions[T: ClassTag](parent: DStream[T],
+    val output: ArrayBuffer[Seq[Seq[T]]] = ArrayBuffer[Seq[Seq[T]]]())
+  extends ForEachDStream[T](parent, (rdd: RDD[T], t: Time) => {
+    val collected = rdd.glom().collect().map(_.toSeq)
+    output += collected
+  }) {
+
+  // This is to clear the output buffer every it is read from a checkpoint
+  @throws(classOf[IOException])
+  private def readObject(ois: ObjectInputStream): Unit = Utils.tryOrIOException {
+    ois.defaultReadObject()
+    output.clear()
+  }
+
+  def toTestOutputStream = new TestOutputStream[T](this.parent, this.output.map(_.flatten))
+}
+
+/**
+ * This is the base trait for Spark Streaming testsuites. This provides basic functionality
+ * to run user-defined set of input on user-defined stream operations, and verify the output.
+ */
+trait TestSuiteBase extends FunSuite with BeforeAndAfter with Logging {
+
+  // Name of the framework for Spark context
+  def framework = this.getClass.getSimpleName
+
+  // Master for Spark context
+  def master = "local[2]"
+
+  // Batch duration
+  def batchDuration = Seconds(1)
+
+  // Directory where the checkpoint data will be saved
+  lazy val checkpointDir = {
+    val dir = Utils.createTempDir()
+    logDebug(s"checkpointDir: $dir")
+    dir.toString
+  }
+
+  // Number of partitions of the input parallel collections created for testing
+  def numInputPartitions = 2
+
+  // Maximum time to wait before the test times out
+  def maxWaitTimeMillis = 10000
+
+  // Whether to use manual clock or not
+  def useManualClock = true
+
+  // Whether to actually wait in real time before changing manual clock
+  def actuallyWait = false
+
+  //// A SparkConf to use in tests. Can be modified before calling setupStreams to configure things.
+  val conf = new SparkConf()
+    .setMaster(master)
+    .setAppName(framework)
+
+  // Default before function for any streaming test suite. Override this
+  // if you want to add your stuff to "before" (i.e., don't call before { } )
+  def beforeFunction() {
+    if (useManualClock) {
+      logInfo("Using manual clock")
+      conf.set("spark.streaming.clock", "org.apache.spark.streaming.util.ManualClock")
+    } else {
+      logInfo("Using real clock")
+      conf.set("spark.streaming.clock", "org.apache.spark.streaming.util.SystemClock")
+    }
+  }
+
+  // Default after function for any streaming test suite. Override this
+  // if you want to add your stuff to "after" (i.e., don't call after { } )
+  def afterFunction() {
+    System.clearProperty("spark.streaming.clock")
+  }
+
+  before(beforeFunction)
+  after(afterFunction)
+
+  /**
+   * Run a block of code with the given StreamingContext and automatically
+   * stop the context when the block completes or when an exception is thrown.
+   */
+  def withStreamingContext[R](ssc: StreamingContext)(block: StreamingContext => R): R = {
+    try {
+      block(ssc)
+    } finally {
+      try {
+        ssc.stop(stopSparkContext = true)
+      } catch {
+        case e: Exception =>
+          logError("Error stopping StreamingContext", e)
+      }
+    }
+  }
+
+  /**
+   * Set up required DStreams to test the DStream operation using the two sequences
+   * of input collections.
+   */
+  def setupStreams[U: ClassTag, V: ClassTag](
+      input: Seq[Seq[U]],
+      operation: DStream[U] => DStream[V],
+      numPartitions: Int = numInputPartitions
+    ): StreamingContext = {
+    // Create StreamingContext
+    val ssc = new StreamingContext(conf, batchDuration)
+    if (checkpointDir != null) {
+      ssc.checkpoint(checkpointDir)
+    }
+
+    // Setup the stream computation
+    val inputStream = new TestInputStream(ssc, input, numPartitions)
+    val operatedStream = operation(inputStream)
+    val outputStream = new TestOutputStreamWithPartitions(operatedStream,
+      new ArrayBuffer[Seq[Seq[V]]] with SynchronizedBuffer[Seq[Seq[V]]])
+    outputStream.register()
+    ssc
+  }
+
+  /**
+   * Set up required DStreams to test the binary operation using the sequence
+   * of input collections.
+   */
+  def setupStreams[U: ClassTag, V: ClassTag, W: ClassTag](
+      input1: Seq[Seq[U]],
+      input2: Seq[Seq[V]],
+      operation: (DStream[U], DStream[V]) => DStream[W]
+    ): StreamingContext = {
+    // Create StreamingContext
+    val ssc = new StreamingContext(conf, batchDuration)
+    if (checkpointDir != null) {
+      ssc.checkpoint(checkpointDir)
+    }
+
+    // Setup the stream computation
+    val inputStream1 = new TestInputStream(ssc, input1, numInputPartitions)
+    val inputStream2 = new TestInputStream(ssc, input2, numInputPartitions)
+    val operatedStream = operation(inputStream1, inputStream2)
+    val outputStream = new TestOutputStreamWithPartitions(operatedStream,
+      new ArrayBuffer[Seq[Seq[W]]] with SynchronizedBuffer[Seq[Seq[W]]])
+    outputStream.register()
+    ssc
+  }
+
+  /**
+   * Runs the streams set up in `ssc` on manual clock for `numBatches` batches and
+   * returns the collected output. It will wait until `numExpectedOutput` number of
+   * output data has been collected or timeout (set by `maxWaitTimeMillis`) is reached.
+   *
+   * Returns a sequence of items for each RDD.
+   */
+  def runStreams[V: ClassTag](
+      ssc: StreamingContext,
+      numBatches: Int,
+      numExpectedOutput: Int
+    ): Seq[Seq[V]] = {
+    // Flatten each RDD into a single Seq
+    runStreamsWithPartitions(ssc, numBatches, numExpectedOutput).map(_.flatten.toSeq)
+  }
+
+  /**
+   * Runs the streams set up in `ssc` on manual clock for `numBatches` batches and
+   * returns the collected output. It will wait until `numExpectedOutput` number of
+   * output data has been collected or timeout (set by `maxWaitTimeMillis`) is reached.
+   *
+   * Returns a sequence of RDD's. Each RDD is represented as several sequences of items, each
+   * representing one partition.
+   */
+  def runStreamsWithPartitions[V: ClassTag](
+      ssc: StreamingContext,
+      numBatches: Int,
+      numExpectedOutput: Int
+    ): Seq[Seq[Seq[V]]] = {
+    assert(numBatches > 0, "Number of batches to run stream computation is zero")
+    assert(numExpectedOutput > 0, "Number of expected outputs after " + numBatches + " is zero")
+    logInfo("numBatches = " + numBatches + ", numExpectedOutput = " + numExpectedOutput)
+
+    // Get the output buffer
+    val outputStream = ssc.graph.getOutputStreams.
+      filter(_.isInstanceOf[TestOutputStreamWithPartitions[_]]).
+      head.asInstanceOf[TestOutputStreamWithPartitions[V]]
+    val output = outputStream.output
+
+    try {
+      // Start computation
+      ssc.start()
+
+      // Advance manual clock
+      val clock = ssc.scheduler.clock.asInstanceOf[ManualClock]
+      logInfo("Manual clock before advancing = " + clock.time)
+      if (actuallyWait) {
+        for (i <- 1 to numBatches) {
+          logInfo("Actually waiting for " + batchDuration)
+          clock.addToTime(batchDuration.milliseconds)
+          Thread.sleep(batchDuration.milliseconds)
+        }
+      } else {
+        clock.addToTime(numBatches * batchDuration.milliseconds)
+      }
+      logInfo("Manual clock after advancing = " + clock.time)
+
+      // Wait until expected number of output items have been generated
+      val startTime = System.currentTimeMillis()
+      while (output.size < numExpectedOutput && System.currentTimeMillis() - startTime < maxWaitTimeMillis) {
+        logInfo("output.size = " + output.size + ", numExpectedOutput = " + numExpectedOutput)
+        ssc.awaitTermination(50)
+      }
+      val timeTaken = System.currentTimeMillis() - startTime
+      logInfo("Output generated in " + timeTaken + " milliseconds")
+      output.foreach(x => logInfo("[" + x.mkString(",") + "]"))
+      assert(timeTaken < maxWaitTimeMillis, "Operation timed out after " + timeTaken + " ms")
+      assert(output.size === numExpectedOutput, "Unexpected number of outputs generated")
+
+      Thread.sleep(100) // Give some time for the forgetting old RDDs to complete
+    } finally {
+      ssc.stop(stopSparkContext = true)
+    }
+    output
+  }
+
+  /**
+   * Verify whether the output values after running a DStream operation
+   * is same as the expected output values, by comparing the output
+   * collections either as lists (order matters) or sets (order does not matter)
+   */
+  def verifyOutput[V: ClassTag](
+      output: Seq[Seq[V]],
+      expectedOutput: Seq[Seq[V]],
+      useSet: Boolean
+    ) {
+    logInfo("--------------------------------")
+    logInfo("output.size = " + output.size)
+    logInfo("output")
+    output.foreach(x => logInfo("[" + x.mkString(",") + "]"))
+    logInfo("expected output.size = " + expectedOutput.size)
+    logInfo("expected output")
+    expectedOutput.foreach(x => logInfo("[" + x.mkString(",") + "]"))
+    logInfo("--------------------------------")
+
+    // Match the output with the expected output
+    assert(output.size === expectedOutput.size, "Number of outputs do not match")
+    for (i <- 0 until output.size) {
+      if (useSet) {
+        assert(output(i).toSet === expectedOutput(i).toSet)
+      } else {
+        assert(output(i).toList === expectedOutput(i).toList)
+      }
+    }
+    logInfo("Output verified successfully")
+  }
+
+  /**
+   * Test unary DStream operation with a list of inputs, with number of
+   * batches to run same as the number of expected output values
+   */
+  def testOperation[U: ClassTag, V: ClassTag](
+      input: Seq[Seq[U]],
+      operation: DStream[U] => DStream[V],
+      expectedOutput: Seq[Seq[V]],
+      useSet: Boolean = false
+    ) {
+    testOperation[U, V](input, operation, expectedOutput, -1, useSet)
+  }
+
+  /**
+   * Test unary DStream operation with a list of inputs
+   * @param input      Sequence of input collections
+   * @param operation  Binary DStream operation to be applied to the 2 inputs
+   * @param expectedOutput Sequence of expected output collections
+   * @param numBatches Number of batches to run the operation for
+   * @param useSet     Compare the output values with the expected output values
+   *                   as sets (order matters) or as lists (order does not matter)
+   */
+  def testOperation[U: ClassTag, V: ClassTag](
+      input: Seq[Seq[U]],
+      operation: DStream[U] => DStream[V],
+      expectedOutput: Seq[Seq[V]],
+      numBatches: Int,
+      useSet: Boolean
+    ) {
+    val numBatches_ = if (numBatches > 0) numBatches else expectedOutput.size
+    withStreamingContext(setupStreams[U, V](input, operation)) { ssc =>
+      val output = runStreams[V](ssc, numBatches_, expectedOutput.size)
+      verifyOutput[V](output, expectedOutput, useSet)
+    }
+  }
+
+  /**
+   * Test binary DStream operation with two lists of inputs, with number of
+   * batches to run same as the number of expected output values
+   */
+  def testOperation[U: ClassTag, V: ClassTag, W: ClassTag](
+      input1: Seq[Seq[U]],
+      input2: Seq[Seq[V]],
+      operation: (DStream[U], DStream[V]) => DStream[W],
+      expectedOutput: Seq[Seq[W]],
+      useSet: Boolean
+    ) {
+    testOperation[U, V, W](input1, input2, operation, expectedOutput, -1, useSet)
+  }
+
+  /**
+   * Test binary DStream operation with two lists of inputs
+   * @param input1     First sequence of input collections
+   * @param input2     Second sequence of input collections
+   * @param operation  Binary DStream operation to be applied to the 2 inputs
+   * @param expectedOutput Sequence of expected output collections
+   * @param numBatches Number of batches to run the operation for
+   * @param useSet     Compare the output values with the expected output values
+   *                   as sets (order matters) or as lists (order does not matter)
+   */
+  def testOperation[U: ClassTag, V: ClassTag, W: ClassTag](
+      input1: Seq[Seq[U]],
+      input2: Seq[Seq[V]],
+      operation: (DStream[U], DStream[V]) => DStream[W],
+      expectedOutput: Seq[Seq[W]],
+      numBatches: Int,
+      useSet: Boolean
+    ) {
+    val numBatches_ = if (numBatches > 0) numBatches else expectedOutput.size
+    withStreamingContext(setupStreams[U, V, W](input1, input2, operation)) { ssc =>
+      val output = runStreams[W](ssc, numBatches_, expectedOutput.size)
+      verifyOutput[W](output, expectedOutput, useSet)
+    }
+  }
+}
diff --git a/external/twitter/pom.xml b/external/twitter/pom.xml
index 1d7dd49d15c22..81a53105af8b5 100644
--- a/external/twitter/pom.xml
+++ b/external/twitter/pom.xml
@@ -39,13 +39,7 @@
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-streaming_${scala.binary.version}</artifactId>
       <version>${project.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.spark</groupId>
-      <artifactId>spark-streaming_${scala.binary.version}</artifactId>
-      <version>${project.version}</version>
-      <type>test-jar</type>
-      <scope>test</scope>
+      <scope>provided</scope>
     </dependency>
     <dependency>
       <groupId>org.twitter4j</groupId>
diff --git a/external/twitter/src/test/java/org/apache/spark/streaming/LocalJavaStreamingContext.java b/external/twitter/src/test/java/org/apache/spark/streaming/LocalJavaStreamingContext.java
new file mode 100644
index 0000000000000..6e1f01900071b
--- /dev/null
+++ b/external/twitter/src/test/java/org/apache/spark/streaming/LocalJavaStreamingContext.java
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming;
+
+import org.apache.spark.streaming.api.java.JavaStreamingContext;
+import org.junit.After;
+import org.junit.Before;
+
+public abstract class LocalJavaStreamingContext {
+
+    protected transient JavaStreamingContext ssc;
+
+    @Before
+    public void setUp() {
+        System.setProperty("spark.streaming.clock", "org.apache.spark.streaming.util.ManualClock");
+        ssc = new JavaStreamingContext("local[2]", "test", new Duration(1000));
+        ssc.checkpoint("checkpoint");
+    }
+
+    @After
+    public void tearDown() {
+        ssc.stop();
+        ssc = null;
+    }
+}
diff --git a/external/twitter/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala b/external/twitter/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala
new file mode 100644
index 0000000000000..76b3b73a2ff3b
--- /dev/null
+++ b/external/twitter/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala
@@ -0,0 +1,413 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming
+
+import java.io.{ObjectInputStream, IOException}
+
+import scala.collection.mutable.ArrayBuffer
+import scala.collection.mutable.SynchronizedBuffer
+import scala.reflect.ClassTag
+
+import org.scalatest.{BeforeAndAfter, FunSuite}
+
+import org.apache.spark.streaming.dstream.{DStream, InputDStream, ForEachDStream}
+import org.apache.spark.streaming.util.ManualClock
+import org.apache.spark.{SparkConf, Logging}
+import org.apache.spark.rdd.RDD
+import org.apache.spark.util.Utils
+
+/**
+ * This is a input stream just for the testsuites. This is equivalent to a checkpointable,
+ * replayable, reliable message queue like Kafka. It requires a sequence as input, and
+ * returns the i_th element at the i_th batch unde manual clock.
+ */
+class TestInputStream[T: ClassTag](ssc_ : StreamingContext, input: Seq[Seq[T]], numPartitions: Int)
+  extends InputDStream[T](ssc_) {
+
+  def start() {}
+
+  def stop() {}
+
+  def compute(validTime: Time): Option[RDD[T]] = {
+    logInfo("Computing RDD for time " + validTime)
+    val index = ((validTime - zeroTime) / slideDuration - 1).toInt
+    val selectedInput = if (index < input.size) input(index) else Seq[T]()
+
+    // lets us test cases where RDDs are not created
+    if (selectedInput == null)
+      return None
+
+    val rdd = ssc.sc.makeRDD(selectedInput, numPartitions)
+    logInfo("Created RDD " + rdd.id + " with " + selectedInput)
+    Some(rdd)
+  }
+}
+
+/**
+ * This is a output stream just for the testsuites. All the output is collected into a
+ * ArrayBuffer. This buffer is wiped clean on being restored from checkpoint.
+ *
+ * The buffer contains a sequence of RDD's, each containing a sequence of items
+ */
+class TestOutputStream[T: ClassTag](parent: DStream[T],
+    val output: ArrayBuffer[Seq[T]] = ArrayBuffer[Seq[T]]())
+  extends ForEachDStream[T](parent, (rdd: RDD[T], t: Time) => {
+    val collected = rdd.collect()
+    output += collected
+  }) {
+
+  // This is to clear the output buffer every it is read from a checkpoint
+  @throws(classOf[IOException])
+  private def readObject(ois: ObjectInputStream): Unit = Utils.tryOrIOException {
+    ois.defaultReadObject()
+    output.clear()
+  }
+}
+
+/**
+ * This is a output stream just for the testsuites. All the output is collected into a
+ * ArrayBuffer. This buffer is wiped clean on being restored from checkpoint.
+ *
+ * The buffer contains a sequence of RDD's, each containing a sequence of partitions, each
+ * containing a sequence of items.
+ */
+class TestOutputStreamWithPartitions[T: ClassTag](parent: DStream[T],
+    val output: ArrayBuffer[Seq[Seq[T]]] = ArrayBuffer[Seq[Seq[T]]]())
+  extends ForEachDStream[T](parent, (rdd: RDD[T], t: Time) => {
+    val collected = rdd.glom().collect().map(_.toSeq)
+    output += collected
+  }) {
+
+  // This is to clear the output buffer every it is read from a checkpoint
+  @throws(classOf[IOException])
+  private def readObject(ois: ObjectInputStream): Unit = Utils.tryOrIOException {
+    ois.defaultReadObject()
+    output.clear()
+  }
+
+  def toTestOutputStream = new TestOutputStream[T](this.parent, this.output.map(_.flatten))
+}
+
+/**
+ * This is the base trait for Spark Streaming testsuites. This provides basic functionality
+ * to run user-defined set of input on user-defined stream operations, and verify the output.
+ */
+trait TestSuiteBase extends FunSuite with BeforeAndAfter with Logging {
+
+  // Name of the framework for Spark context
+  def framework = this.getClass.getSimpleName
+
+  // Master for Spark context
+  def master = "local[2]"
+
+  // Batch duration
+  def batchDuration = Seconds(1)
+
+  // Directory where the checkpoint data will be saved
+  lazy val checkpointDir = {
+    val dir = Utils.createTempDir()
+    logDebug(s"checkpointDir: $dir")
+    dir.toString
+  }
+
+  // Number of partitions of the input parallel collections created for testing
+  def numInputPartitions = 2
+
+  // Maximum time to wait before the test times out
+  def maxWaitTimeMillis = 10000
+
+  // Whether to use manual clock or not
+  def useManualClock = true
+
+  // Whether to actually wait in real time before changing manual clock
+  def actuallyWait = false
+
+  //// A SparkConf to use in tests. Can be modified before calling setupStreams to configure things.
+  val conf = new SparkConf()
+    .setMaster(master)
+    .setAppName(framework)
+
+  // Default before function for any streaming test suite. Override this
+  // if you want to add your stuff to "before" (i.e., don't call before { } )
+  def beforeFunction() {
+    if (useManualClock) {
+      logInfo("Using manual clock")
+      conf.set("spark.streaming.clock", "org.apache.spark.streaming.util.ManualClock")
+    } else {
+      logInfo("Using real clock")
+      conf.set("spark.streaming.clock", "org.apache.spark.streaming.util.SystemClock")
+    }
+  }
+
+  // Default after function for any streaming test suite. Override this
+  // if you want to add your stuff to "after" (i.e., don't call after { } )
+  def afterFunction() {
+    System.clearProperty("spark.streaming.clock")
+  }
+
+  before(beforeFunction)
+  after(afterFunction)
+
+  /**
+   * Run a block of code with the given StreamingContext and automatically
+   * stop the context when the block completes or when an exception is thrown.
+   */
+  def withStreamingContext[R](ssc: StreamingContext)(block: StreamingContext => R): R = {
+    try {
+      block(ssc)
+    } finally {
+      try {
+        ssc.stop(stopSparkContext = true)
+      } catch {
+        case e: Exception =>
+          logError("Error stopping StreamingContext", e)
+      }
+    }
+  }
+
+  /**
+   * Set up required DStreams to test the DStream operation using the two sequences
+   * of input collections.
+   */
+  def setupStreams[U: ClassTag, V: ClassTag](
+      input: Seq[Seq[U]],
+      operation: DStream[U] => DStream[V],
+      numPartitions: Int = numInputPartitions
+    ): StreamingContext = {
+    // Create StreamingContext
+    val ssc = new StreamingContext(conf, batchDuration)
+    if (checkpointDir != null) {
+      ssc.checkpoint(checkpointDir)
+    }
+
+    // Setup the stream computation
+    val inputStream = new TestInputStream(ssc, input, numPartitions)
+    val operatedStream = operation(inputStream)
+    val outputStream = new TestOutputStreamWithPartitions(operatedStream,
+      new ArrayBuffer[Seq[Seq[V]]] with SynchronizedBuffer[Seq[Seq[V]]])
+    outputStream.register()
+    ssc
+  }
+
+  /**
+   * Set up required DStreams to test the binary operation using the sequence
+   * of input collections.
+   */
+  def setupStreams[U: ClassTag, V: ClassTag, W: ClassTag](
+      input1: Seq[Seq[U]],
+      input2: Seq[Seq[V]],
+      operation: (DStream[U], DStream[V]) => DStream[W]
+    ): StreamingContext = {
+    // Create StreamingContext
+    val ssc = new StreamingContext(conf, batchDuration)
+    if (checkpointDir != null) {
+      ssc.checkpoint(checkpointDir)
+    }
+
+    // Setup the stream computation
+    val inputStream1 = new TestInputStream(ssc, input1, numInputPartitions)
+    val inputStream2 = new TestInputStream(ssc, input2, numInputPartitions)
+    val operatedStream = operation(inputStream1, inputStream2)
+    val outputStream = new TestOutputStreamWithPartitions(operatedStream,
+      new ArrayBuffer[Seq[Seq[W]]] with SynchronizedBuffer[Seq[Seq[W]]])
+    outputStream.register()
+    ssc
+  }
+
+  /**
+   * Runs the streams set up in `ssc` on manual clock for `numBatches` batches and
+   * returns the collected output. It will wait until `numExpectedOutput` number of
+   * output data has been collected or timeout (set by `maxWaitTimeMillis`) is reached.
+   *
+   * Returns a sequence of items for each RDD.
+   */
+  def runStreams[V: ClassTag](
+      ssc: StreamingContext,
+      numBatches: Int,
+      numExpectedOutput: Int
+    ): Seq[Seq[V]] = {
+    // Flatten each RDD into a single Seq
+    runStreamsWithPartitions(ssc, numBatches, numExpectedOutput).map(_.flatten.toSeq)
+  }
+
+  /**
+   * Runs the streams set up in `ssc` on manual clock for `numBatches` batches and
+   * returns the collected output. It will wait until `numExpectedOutput` number of
+   * output data has been collected or timeout (set by `maxWaitTimeMillis`) is reached.
+   *
+   * Returns a sequence of RDD's. Each RDD is represented as several sequences of items, each
+   * representing one partition.
+   */
+  def runStreamsWithPartitions[V: ClassTag](
+      ssc: StreamingContext,
+      numBatches: Int,
+      numExpectedOutput: Int
+    ): Seq[Seq[Seq[V]]] = {
+    assert(numBatches > 0, "Number of batches to run stream computation is zero")
+    assert(numExpectedOutput > 0, "Number of expected outputs after " + numBatches + " is zero")
+    logInfo("numBatches = " + numBatches + ", numExpectedOutput = " + numExpectedOutput)
+
+    // Get the output buffer
+    val outputStream = ssc.graph.getOutputStreams.
+      filter(_.isInstanceOf[TestOutputStreamWithPartitions[_]]).
+      head.asInstanceOf[TestOutputStreamWithPartitions[V]]
+    val output = outputStream.output
+
+    try {
+      // Start computation
+      ssc.start()
+
+      // Advance manual clock
+      val clock = ssc.scheduler.clock.asInstanceOf[ManualClock]
+      logInfo("Manual clock before advancing = " + clock.time)
+      if (actuallyWait) {
+        for (i <- 1 to numBatches) {
+          logInfo("Actually waiting for " + batchDuration)
+          clock.addToTime(batchDuration.milliseconds)
+          Thread.sleep(batchDuration.milliseconds)
+        }
+      } else {
+        clock.addToTime(numBatches * batchDuration.milliseconds)
+      }
+      logInfo("Manual clock after advancing = " + clock.time)
+
+      // Wait until expected number of output items have been generated
+      val startTime = System.currentTimeMillis()
+      while (output.size < numExpectedOutput && System.currentTimeMillis() - startTime < maxWaitTimeMillis) {
+        logInfo("output.size = " + output.size + ", numExpectedOutput = " + numExpectedOutput)
+        ssc.awaitTermination(50)
+      }
+      val timeTaken = System.currentTimeMillis() - startTime
+      logInfo("Output generated in " + timeTaken + " milliseconds")
+      output.foreach(x => logInfo("[" + x.mkString(",") + "]"))
+      assert(timeTaken < maxWaitTimeMillis, "Operation timed out after " + timeTaken + " ms")
+      assert(output.size === numExpectedOutput, "Unexpected number of outputs generated")
+
+      Thread.sleep(100) // Give some time for the forgetting old RDDs to complete
+    } finally {
+      ssc.stop(stopSparkContext = true)
+    }
+    output
+  }
+
+  /**
+   * Verify whether the output values after running a DStream operation
+   * is same as the expected output values, by comparing the output
+   * collections either as lists (order matters) or sets (order does not matter)
+   */
+  def verifyOutput[V: ClassTag](
+      output: Seq[Seq[V]],
+      expectedOutput: Seq[Seq[V]],
+      useSet: Boolean
+    ) {
+    logInfo("--------------------------------")
+    logInfo("output.size = " + output.size)
+    logInfo("output")
+    output.foreach(x => logInfo("[" + x.mkString(",") + "]"))
+    logInfo("expected output.size = " + expectedOutput.size)
+    logInfo("expected output")
+    expectedOutput.foreach(x => logInfo("[" + x.mkString(",") + "]"))
+    logInfo("--------------------------------")
+
+    // Match the output with the expected output
+    assert(output.size === expectedOutput.size, "Number of outputs do not match")
+    for (i <- 0 until output.size) {
+      if (useSet) {
+        assert(output(i).toSet === expectedOutput(i).toSet)
+      } else {
+        assert(output(i).toList === expectedOutput(i).toList)
+      }
+    }
+    logInfo("Output verified successfully")
+  }
+
+  /**
+   * Test unary DStream operation with a list of inputs, with number of
+   * batches to run same as the number of expected output values
+   */
+  def testOperation[U: ClassTag, V: ClassTag](
+      input: Seq[Seq[U]],
+      operation: DStream[U] => DStream[V],
+      expectedOutput: Seq[Seq[V]],
+      useSet: Boolean = false
+    ) {
+    testOperation[U, V](input, operation, expectedOutput, -1, useSet)
+  }
+
+  /**
+   * Test unary DStream operation with a list of inputs
+   * @param input      Sequence of input collections
+   * @param operation  Binary DStream operation to be applied to the 2 inputs
+   * @param expectedOutput Sequence of expected output collections
+   * @param numBatches Number of batches to run the operation for
+   * @param useSet     Compare the output values with the expected output values
+   *                   as sets (order matters) or as lists (order does not matter)
+   */
+  def testOperation[U: ClassTag, V: ClassTag](
+      input: Seq[Seq[U]],
+      operation: DStream[U] => DStream[V],
+      expectedOutput: Seq[Seq[V]],
+      numBatches: Int,
+      useSet: Boolean
+    ) {
+    val numBatches_ = if (numBatches > 0) numBatches else expectedOutput.size
+    withStreamingContext(setupStreams[U, V](input, operation)) { ssc =>
+      val output = runStreams[V](ssc, numBatches_, expectedOutput.size)
+      verifyOutput[V](output, expectedOutput, useSet)
+    }
+  }
+
+  /**
+   * Test binary DStream operation with two lists of inputs, with number of
+   * batches to run same as the number of expected output values
+   */
+  def testOperation[U: ClassTag, V: ClassTag, W: ClassTag](
+      input1: Seq[Seq[U]],
+      input2: Seq[Seq[V]],
+      operation: (DStream[U], DStream[V]) => DStream[W],
+      expectedOutput: Seq[Seq[W]],
+      useSet: Boolean
+    ) {
+    testOperation[U, V, W](input1, input2, operation, expectedOutput, -1, useSet)
+  }
+
+  /**
+   * Test binary DStream operation with two lists of inputs
+   * @param input1     First sequence of input collections
+   * @param input2     Second sequence of input collections
+   * @param operation  Binary DStream operation to be applied to the 2 inputs
+   * @param expectedOutput Sequence of expected output collections
+   * @param numBatches Number of batches to run the operation for
+   * @param useSet     Compare the output values with the expected output values
+   *                   as sets (order matters) or as lists (order does not matter)
+   */
+  def testOperation[U: ClassTag, V: ClassTag, W: ClassTag](
+      input1: Seq[Seq[U]],
+      input2: Seq[Seq[V]],
+      operation: (DStream[U], DStream[V]) => DStream[W],
+      expectedOutput: Seq[Seq[W]],
+      numBatches: Int,
+      useSet: Boolean
+    ) {
+    val numBatches_ = if (numBatches > 0) numBatches else expectedOutput.size
+    withStreamingContext(setupStreams[U, V, W](input1, input2, operation)) { ssc =>
+      val output = runStreams[W](ssc, numBatches_, expectedOutput.size)
+      verifyOutput[W](output, expectedOutput, useSet)
+    }
+  }
+}
diff --git a/external/zeromq/pom.xml b/external/zeromq/pom.xml
index 7e48968feb3bc..6d75179e9404d 100644
--- a/external/zeromq/pom.xml
+++ b/external/zeromq/pom.xml
@@ -39,13 +39,7 @@
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-streaming_${scala.binary.version}</artifactId>
       <version>${project.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.spark</groupId>
-      <artifactId>spark-streaming_${scala.binary.version}</artifactId>
-      <version>${project.version}</version>
-      <type>test-jar</type>
-      <scope>test</scope>
+      <scope>provided</scope>
     </dependency>
     <dependency>
       <groupId>${akka.group}</groupId>
diff --git a/external/zeromq/src/test/java/org/apache/spark/streaming/LocalJavaStreamingContext.java b/external/zeromq/src/test/java/org/apache/spark/streaming/LocalJavaStreamingContext.java
new file mode 100644
index 0000000000000..6e1f01900071b
--- /dev/null
+++ b/external/zeromq/src/test/java/org/apache/spark/streaming/LocalJavaStreamingContext.java
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming;
+
+import org.apache.spark.streaming.api.java.JavaStreamingContext;
+import org.junit.After;
+import org.junit.Before;
+
+public abstract class LocalJavaStreamingContext {
+
+    protected transient JavaStreamingContext ssc;
+
+    @Before
+    public void setUp() {
+        System.setProperty("spark.streaming.clock", "org.apache.spark.streaming.util.ManualClock");
+        ssc = new JavaStreamingContext("local[2]", "test", new Duration(1000));
+        ssc.checkpoint("checkpoint");
+    }
+
+    @After
+    public void tearDown() {
+        ssc.stop();
+        ssc = null;
+    }
+}
diff --git a/external/zeromq/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala b/external/zeromq/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala
new file mode 100644
index 0000000000000..76b3b73a2ff3b
--- /dev/null
+++ b/external/zeromq/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala
@@ -0,0 +1,413 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming
+
+import java.io.{ObjectInputStream, IOException}
+
+import scala.collection.mutable.ArrayBuffer
+import scala.collection.mutable.SynchronizedBuffer
+import scala.reflect.ClassTag
+
+import org.scalatest.{BeforeAndAfter, FunSuite}
+
+import org.apache.spark.streaming.dstream.{DStream, InputDStream, ForEachDStream}
+import org.apache.spark.streaming.util.ManualClock
+import org.apache.spark.{SparkConf, Logging}
+import org.apache.spark.rdd.RDD
+import org.apache.spark.util.Utils
+
+/**
+ * This is a input stream just for the testsuites. This is equivalent to a checkpointable,
+ * replayable, reliable message queue like Kafka. It requires a sequence as input, and
+ * returns the i_th element at the i_th batch unde manual clock.
+ */
+class TestInputStream[T: ClassTag](ssc_ : StreamingContext, input: Seq[Seq[T]], numPartitions: Int)
+  extends InputDStream[T](ssc_) {
+
+  def start() {}
+
+  def stop() {}
+
+  def compute(validTime: Time): Option[RDD[T]] = {
+    logInfo("Computing RDD for time " + validTime)
+    val index = ((validTime - zeroTime) / slideDuration - 1).toInt
+    val selectedInput = if (index < input.size) input(index) else Seq[T]()
+
+    // lets us test cases where RDDs are not created
+    if (selectedInput == null)
+      return None
+
+    val rdd = ssc.sc.makeRDD(selectedInput, numPartitions)
+    logInfo("Created RDD " + rdd.id + " with " + selectedInput)
+    Some(rdd)
+  }
+}
+
+/**
+ * This is a output stream just for the testsuites. All the output is collected into a
+ * ArrayBuffer. This buffer is wiped clean on being restored from checkpoint.
+ *
+ * The buffer contains a sequence of RDD's, each containing a sequence of items
+ */
+class TestOutputStream[T: ClassTag](parent: DStream[T],
+    val output: ArrayBuffer[Seq[T]] = ArrayBuffer[Seq[T]]())
+  extends ForEachDStream[T](parent, (rdd: RDD[T], t: Time) => {
+    val collected = rdd.collect()
+    output += collected
+  }) {
+
+  // This is to clear the output buffer every it is read from a checkpoint
+  @throws(classOf[IOException])
+  private def readObject(ois: ObjectInputStream): Unit = Utils.tryOrIOException {
+    ois.defaultReadObject()
+    output.clear()
+  }
+}
+
+/**
+ * This is a output stream just for the testsuites. All the output is collected into a
+ * ArrayBuffer. This buffer is wiped clean on being restored from checkpoint.
+ *
+ * The buffer contains a sequence of RDD's, each containing a sequence of partitions, each
+ * containing a sequence of items.
+ */
+class TestOutputStreamWithPartitions[T: ClassTag](parent: DStream[T],
+    val output: ArrayBuffer[Seq[Seq[T]]] = ArrayBuffer[Seq[Seq[T]]]())
+  extends ForEachDStream[T](parent, (rdd: RDD[T], t: Time) => {
+    val collected = rdd.glom().collect().map(_.toSeq)
+    output += collected
+  }) {
+
+  // This is to clear the output buffer every it is read from a checkpoint
+  @throws(classOf[IOException])
+  private def readObject(ois: ObjectInputStream): Unit = Utils.tryOrIOException {
+    ois.defaultReadObject()
+    output.clear()
+  }
+
+  def toTestOutputStream = new TestOutputStream[T](this.parent, this.output.map(_.flatten))
+}
+
+/**
+ * This is the base trait for Spark Streaming testsuites. This provides basic functionality
+ * to run user-defined set of input on user-defined stream operations, and verify the output.
+ */
+trait TestSuiteBase extends FunSuite with BeforeAndAfter with Logging {
+
+  // Name of the framework for Spark context
+  def framework = this.getClass.getSimpleName
+
+  // Master for Spark context
+  def master = "local[2]"
+
+  // Batch duration
+  def batchDuration = Seconds(1)
+
+  // Directory where the checkpoint data will be saved
+  lazy val checkpointDir = {
+    val dir = Utils.createTempDir()
+    logDebug(s"checkpointDir: $dir")
+    dir.toString
+  }
+
+  // Number of partitions of the input parallel collections created for testing
+  def numInputPartitions = 2
+
+  // Maximum time to wait before the test times out
+  def maxWaitTimeMillis = 10000
+
+  // Whether to use manual clock or not
+  def useManualClock = true
+
+  // Whether to actually wait in real time before changing manual clock
+  def actuallyWait = false
+
+  //// A SparkConf to use in tests. Can be modified before calling setupStreams to configure things.
+  val conf = new SparkConf()
+    .setMaster(master)
+    .setAppName(framework)
+
+  // Default before function for any streaming test suite. Override this
+  // if you want to add your stuff to "before" (i.e., don't call before { } )
+  def beforeFunction() {
+    if (useManualClock) {
+      logInfo("Using manual clock")
+      conf.set("spark.streaming.clock", "org.apache.spark.streaming.util.ManualClock")
+    } else {
+      logInfo("Using real clock")
+      conf.set("spark.streaming.clock", "org.apache.spark.streaming.util.SystemClock")
+    }
+  }
+
+  // Default after function for any streaming test suite. Override this
+  // if you want to add your stuff to "after" (i.e., don't call after { } )
+  def afterFunction() {
+    System.clearProperty("spark.streaming.clock")
+  }
+
+  before(beforeFunction)
+  after(afterFunction)
+
+  /**
+   * Run a block of code with the given StreamingContext and automatically
+   * stop the context when the block completes or when an exception is thrown.
+   */
+  def withStreamingContext[R](ssc: StreamingContext)(block: StreamingContext => R): R = {
+    try {
+      block(ssc)
+    } finally {
+      try {
+        ssc.stop(stopSparkContext = true)
+      } catch {
+        case e: Exception =>
+          logError("Error stopping StreamingContext", e)
+      }
+    }
+  }
+
+  /**
+   * Set up required DStreams to test the DStream operation using the two sequences
+   * of input collections.
+   */
+  def setupStreams[U: ClassTag, V: ClassTag](
+      input: Seq[Seq[U]],
+      operation: DStream[U] => DStream[V],
+      numPartitions: Int = numInputPartitions
+    ): StreamingContext = {
+    // Create StreamingContext
+    val ssc = new StreamingContext(conf, batchDuration)
+    if (checkpointDir != null) {
+      ssc.checkpoint(checkpointDir)
+    }
+
+    // Setup the stream computation
+    val inputStream = new TestInputStream(ssc, input, numPartitions)
+    val operatedStream = operation(inputStream)
+    val outputStream = new TestOutputStreamWithPartitions(operatedStream,
+      new ArrayBuffer[Seq[Seq[V]]] with SynchronizedBuffer[Seq[Seq[V]]])
+    outputStream.register()
+    ssc
+  }
+
+  /**
+   * Set up required DStreams to test the binary operation using the sequence
+   * of input collections.
+   */
+  def setupStreams[U: ClassTag, V: ClassTag, W: ClassTag](
+      input1: Seq[Seq[U]],
+      input2: Seq[Seq[V]],
+      operation: (DStream[U], DStream[V]) => DStream[W]
+    ): StreamingContext = {
+    // Create StreamingContext
+    val ssc = new StreamingContext(conf, batchDuration)
+    if (checkpointDir != null) {
+      ssc.checkpoint(checkpointDir)
+    }
+
+    // Setup the stream computation
+    val inputStream1 = new TestInputStream(ssc, input1, numInputPartitions)
+    val inputStream2 = new TestInputStream(ssc, input2, numInputPartitions)
+    val operatedStream = operation(inputStream1, inputStream2)
+    val outputStream = new TestOutputStreamWithPartitions(operatedStream,
+      new ArrayBuffer[Seq[Seq[W]]] with SynchronizedBuffer[Seq[Seq[W]]])
+    outputStream.register()
+    ssc
+  }
+
+  /**
+   * Runs the streams set up in `ssc` on manual clock for `numBatches` batches and
+   * returns the collected output. It will wait until `numExpectedOutput` number of
+   * output data has been collected or timeout (set by `maxWaitTimeMillis`) is reached.
+   *
+   * Returns a sequence of items for each RDD.
+   */
+  def runStreams[V: ClassTag](
+      ssc: StreamingContext,
+      numBatches: Int,
+      numExpectedOutput: Int
+    ): Seq[Seq[V]] = {
+    // Flatten each RDD into a single Seq
+    runStreamsWithPartitions(ssc, numBatches, numExpectedOutput).map(_.flatten.toSeq)
+  }
+
+  /**
+   * Runs the streams set up in `ssc` on manual clock for `numBatches` batches and
+   * returns the collected output. It will wait until `numExpectedOutput` number of
+   * output data has been collected or timeout (set by `maxWaitTimeMillis`) is reached.
+   *
+   * Returns a sequence of RDD's. Each RDD is represented as several sequences of items, each
+   * representing one partition.
+   */
+  def runStreamsWithPartitions[V: ClassTag](
+      ssc: StreamingContext,
+      numBatches: Int,
+      numExpectedOutput: Int
+    ): Seq[Seq[Seq[V]]] = {
+    assert(numBatches > 0, "Number of batches to run stream computation is zero")
+    assert(numExpectedOutput > 0, "Number of expected outputs after " + numBatches + " is zero")
+    logInfo("numBatches = " + numBatches + ", numExpectedOutput = " + numExpectedOutput)
+
+    // Get the output buffer
+    val outputStream = ssc.graph.getOutputStreams.
+      filter(_.isInstanceOf[TestOutputStreamWithPartitions[_]]).
+      head.asInstanceOf[TestOutputStreamWithPartitions[V]]
+    val output = outputStream.output
+
+    try {
+      // Start computation
+      ssc.start()
+
+      // Advance manual clock
+      val clock = ssc.scheduler.clock.asInstanceOf[ManualClock]
+      logInfo("Manual clock before advancing = " + clock.time)
+      if (actuallyWait) {
+        for (i <- 1 to numBatches) {
+          logInfo("Actually waiting for " + batchDuration)
+          clock.addToTime(batchDuration.milliseconds)
+          Thread.sleep(batchDuration.milliseconds)
+        }
+      } else {
+        clock.addToTime(numBatches * batchDuration.milliseconds)
+      }
+      logInfo("Manual clock after advancing = " + clock.time)
+
+      // Wait until expected number of output items have been generated
+      val startTime = System.currentTimeMillis()
+      while (output.size < numExpectedOutput && System.currentTimeMillis() - startTime < maxWaitTimeMillis) {
+        logInfo("output.size = " + output.size + ", numExpectedOutput = " + numExpectedOutput)
+        ssc.awaitTermination(50)
+      }
+      val timeTaken = System.currentTimeMillis() - startTime
+      logInfo("Output generated in " + timeTaken + " milliseconds")
+      output.foreach(x => logInfo("[" + x.mkString(",") + "]"))
+      assert(timeTaken < maxWaitTimeMillis, "Operation timed out after " + timeTaken + " ms")
+      assert(output.size === numExpectedOutput, "Unexpected number of outputs generated")
+
+      Thread.sleep(100) // Give some time for the forgetting old RDDs to complete
+    } finally {
+      ssc.stop(stopSparkContext = true)
+    }
+    output
+  }
+
+  /**
+   * Verify whether the output values after running a DStream operation
+   * is same as the expected output values, by comparing the output
+   * collections either as lists (order matters) or sets (order does not matter)
+   */
+  def verifyOutput[V: ClassTag](
+      output: Seq[Seq[V]],
+      expectedOutput: Seq[Seq[V]],
+      useSet: Boolean
+    ) {
+    logInfo("--------------------------------")
+    logInfo("output.size = " + output.size)
+    logInfo("output")
+    output.foreach(x => logInfo("[" + x.mkString(",") + "]"))
+    logInfo("expected output.size = " + expectedOutput.size)
+    logInfo("expected output")
+    expectedOutput.foreach(x => logInfo("[" + x.mkString(",") + "]"))
+    logInfo("--------------------------------")
+
+    // Match the output with the expected output
+    assert(output.size === expectedOutput.size, "Number of outputs do not match")
+    for (i <- 0 until output.size) {
+      if (useSet) {
+        assert(output(i).toSet === expectedOutput(i).toSet)
+      } else {
+        assert(output(i).toList === expectedOutput(i).toList)
+      }
+    }
+    logInfo("Output verified successfully")
+  }
+
+  /**
+   * Test unary DStream operation with a list of inputs, with number of
+   * batches to run same as the number of expected output values
+   */
+  def testOperation[U: ClassTag, V: ClassTag](
+      input: Seq[Seq[U]],
+      operation: DStream[U] => DStream[V],
+      expectedOutput: Seq[Seq[V]],
+      useSet: Boolean = false
+    ) {
+    testOperation[U, V](input, operation, expectedOutput, -1, useSet)
+  }
+
+  /**
+   * Test unary DStream operation with a list of inputs
+   * @param input      Sequence of input collections
+   * @param operation  Binary DStream operation to be applied to the 2 inputs
+   * @param expectedOutput Sequence of expected output collections
+   * @param numBatches Number of batches to run the operation for
+   * @param useSet     Compare the output values with the expected output values
+   *                   as sets (order matters) or as lists (order does not matter)
+   */
+  def testOperation[U: ClassTag, V: ClassTag](
+      input: Seq[Seq[U]],
+      operation: DStream[U] => DStream[V],
+      expectedOutput: Seq[Seq[V]],
+      numBatches: Int,
+      useSet: Boolean
+    ) {
+    val numBatches_ = if (numBatches > 0) numBatches else expectedOutput.size
+    withStreamingContext(setupStreams[U, V](input, operation)) { ssc =>
+      val output = runStreams[V](ssc, numBatches_, expectedOutput.size)
+      verifyOutput[V](output, expectedOutput, useSet)
+    }
+  }
+
+  /**
+   * Test binary DStream operation with two lists of inputs, with number of
+   * batches to run same as the number of expected output values
+   */
+  def testOperation[U: ClassTag, V: ClassTag, W: ClassTag](
+      input1: Seq[Seq[U]],
+      input2: Seq[Seq[V]],
+      operation: (DStream[U], DStream[V]) => DStream[W],
+      expectedOutput: Seq[Seq[W]],
+      useSet: Boolean
+    ) {
+    testOperation[U, V, W](input1, input2, operation, expectedOutput, -1, useSet)
+  }
+
+  /**
+   * Test binary DStream operation with two lists of inputs
+   * @param input1     First sequence of input collections
+   * @param input2     Second sequence of input collections
+   * @param operation  Binary DStream operation to be applied to the 2 inputs
+   * @param expectedOutput Sequence of expected output collections
+   * @param numBatches Number of batches to run the operation for
+   * @param useSet     Compare the output values with the expected output values
+   *                   as sets (order matters) or as lists (order does not matter)
+   */
+  def testOperation[U: ClassTag, V: ClassTag, W: ClassTag](
+      input1: Seq[Seq[U]],
+      input2: Seq[Seq[V]],
+      operation: (DStream[U], DStream[V]) => DStream[W],
+      expectedOutput: Seq[Seq[W]],
+      numBatches: Int,
+      useSet: Boolean
+    ) {
+    val numBatches_ = if (numBatches > 0) numBatches else expectedOutput.size
+    withStreamingContext(setupStreams[U, V, W](input1, input2, operation)) { ssc =>
+      val output = runStreams[W](ssc, numBatches_, expectedOutput.size)
+      verifyOutput[W](output, expectedOutput, useSet)
+    }
+  }
+}

From 270b4fbcc4ba07860fc08f3432b248693a097171 Mon Sep 17 00:00:00 2001
From: Prashant Sharma <prashant.s@imaginea.com>
Date: Wed, 19 Nov 2014 14:21:00 +0530
Subject: [PATCH 2/3] Removed most of the unused code.

---
 .../spark/streaming/TestOutputStream.scala    |  48 ++
 .../spark/streaming/TestSuiteBase.scala       | 413 ------------------
 .../flume/FlumePollingStreamSuite.scala       |  15 +-
 .../spark/streaming/TestSuiteBase.scala       | 413 ------------------
 .../spark/streaming/TestSuiteBase.scala       | 413 ------------------
 .../streaming/mqtt/MQTTStreamSuite.scala      |  12 +-
 .../spark/streaming/TestSuiteBase.scala       | 413 ------------------
 .../twitter/TwitterStreamSuite.scala          |  18 +-
 .../spark/streaming/TestSuiteBase.scala       | 413 ------------------
 .../streaming/zeromq/ZeroMQStreamSuite.scala  |  11 +-
 10 files changed, 91 insertions(+), 2078 deletions(-)
 create mode 100644 external/flume/src/test/scala/org/apache/spark/streaming/TestOutputStream.scala
 delete mode 100644 external/flume/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala
 delete mode 100644 external/kafka/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala
 delete mode 100644 external/mqtt/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala
 delete mode 100644 external/twitter/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala
 delete mode 100644 external/zeromq/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala

diff --git a/external/flume/src/test/scala/org/apache/spark/streaming/TestOutputStream.scala b/external/flume/src/test/scala/org/apache/spark/streaming/TestOutputStream.scala
new file mode 100644
index 0000000000000..1a900007b696b
--- /dev/null
+++ b/external/flume/src/test/scala/org/apache/spark/streaming/TestOutputStream.scala
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming
+
+import java.io.{IOException, ObjectInputStream}
+
+import org.apache.spark.rdd.RDD
+import org.apache.spark.streaming.dstream.{DStream, ForEachDStream}
+import org.apache.spark.util.Utils
+
+import scala.collection.mutable.ArrayBuffer
+import scala.reflect.ClassTag
+
+/**
+ * This is a output stream just for the testsuites. All the output is collected into a
+ * ArrayBuffer. This buffer is wiped clean on being restored from checkpoint.
+ *
+ * The buffer contains a sequence of RDD's, each containing a sequence of items
+ */
+class TestOutputStream[T: ClassTag](parent: DStream[T],
+    val output: ArrayBuffer[Seq[T]] = ArrayBuffer[Seq[T]]())
+  extends ForEachDStream[T](parent, (rdd: RDD[T], t: Time) => {
+    val collected = rdd.collect()
+    output += collected
+  }) {
+
+  // This is to clear the output buffer every it is read from a checkpoint
+  @throws(classOf[IOException])
+  private def readObject(ois: ObjectInputStream): Unit = Utils.tryOrIOException {
+    ois.defaultReadObject()
+    output.clear()
+  }
+}
diff --git a/external/flume/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala b/external/flume/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala
deleted file mode 100644
index 76b3b73a2ff3b..0000000000000
--- a/external/flume/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala
+++ /dev/null
@@ -1,413 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.streaming
-
-import java.io.{ObjectInputStream, IOException}
-
-import scala.collection.mutable.ArrayBuffer
-import scala.collection.mutable.SynchronizedBuffer
-import scala.reflect.ClassTag
-
-import org.scalatest.{BeforeAndAfter, FunSuite}
-
-import org.apache.spark.streaming.dstream.{DStream, InputDStream, ForEachDStream}
-import org.apache.spark.streaming.util.ManualClock
-import org.apache.spark.{SparkConf, Logging}
-import org.apache.spark.rdd.RDD
-import org.apache.spark.util.Utils
-
-/**
- * This is a input stream just for the testsuites. This is equivalent to a checkpointable,
- * replayable, reliable message queue like Kafka. It requires a sequence as input, and
- * returns the i_th element at the i_th batch unde manual clock.
- */
-class TestInputStream[T: ClassTag](ssc_ : StreamingContext, input: Seq[Seq[T]], numPartitions: Int)
-  extends InputDStream[T](ssc_) {
-
-  def start() {}
-
-  def stop() {}
-
-  def compute(validTime: Time): Option[RDD[T]] = {
-    logInfo("Computing RDD for time " + validTime)
-    val index = ((validTime - zeroTime) / slideDuration - 1).toInt
-    val selectedInput = if (index < input.size) input(index) else Seq[T]()
-
-    // lets us test cases where RDDs are not created
-    if (selectedInput == null)
-      return None
-
-    val rdd = ssc.sc.makeRDD(selectedInput, numPartitions)
-    logInfo("Created RDD " + rdd.id + " with " + selectedInput)
-    Some(rdd)
-  }
-}
-
-/**
- * This is a output stream just for the testsuites. All the output is collected into a
- * ArrayBuffer. This buffer is wiped clean on being restored from checkpoint.
- *
- * The buffer contains a sequence of RDD's, each containing a sequence of items
- */
-class TestOutputStream[T: ClassTag](parent: DStream[T],
-    val output: ArrayBuffer[Seq[T]] = ArrayBuffer[Seq[T]]())
-  extends ForEachDStream[T](parent, (rdd: RDD[T], t: Time) => {
-    val collected = rdd.collect()
-    output += collected
-  }) {
-
-  // This is to clear the output buffer every it is read from a checkpoint
-  @throws(classOf[IOException])
-  private def readObject(ois: ObjectInputStream): Unit = Utils.tryOrIOException {
-    ois.defaultReadObject()
-    output.clear()
-  }
-}
-
-/**
- * This is a output stream just for the testsuites. All the output is collected into a
- * ArrayBuffer. This buffer is wiped clean on being restored from checkpoint.
- *
- * The buffer contains a sequence of RDD's, each containing a sequence of partitions, each
- * containing a sequence of items.
- */
-class TestOutputStreamWithPartitions[T: ClassTag](parent: DStream[T],
-    val output: ArrayBuffer[Seq[Seq[T]]] = ArrayBuffer[Seq[Seq[T]]]())
-  extends ForEachDStream[T](parent, (rdd: RDD[T], t: Time) => {
-    val collected = rdd.glom().collect().map(_.toSeq)
-    output += collected
-  }) {
-
-  // This is to clear the output buffer every it is read from a checkpoint
-  @throws(classOf[IOException])
-  private def readObject(ois: ObjectInputStream): Unit = Utils.tryOrIOException {
-    ois.defaultReadObject()
-    output.clear()
-  }
-
-  def toTestOutputStream = new TestOutputStream[T](this.parent, this.output.map(_.flatten))
-}
-
-/**
- * This is the base trait for Spark Streaming testsuites. This provides basic functionality
- * to run user-defined set of input on user-defined stream operations, and verify the output.
- */
-trait TestSuiteBase extends FunSuite with BeforeAndAfter with Logging {
-
-  // Name of the framework for Spark context
-  def framework = this.getClass.getSimpleName
-
-  // Master for Spark context
-  def master = "local[2]"
-
-  // Batch duration
-  def batchDuration = Seconds(1)
-
-  // Directory where the checkpoint data will be saved
-  lazy val checkpointDir = {
-    val dir = Utils.createTempDir()
-    logDebug(s"checkpointDir: $dir")
-    dir.toString
-  }
-
-  // Number of partitions of the input parallel collections created for testing
-  def numInputPartitions = 2
-
-  // Maximum time to wait before the test times out
-  def maxWaitTimeMillis = 10000
-
-  // Whether to use manual clock or not
-  def useManualClock = true
-
-  // Whether to actually wait in real time before changing manual clock
-  def actuallyWait = false
-
-  //// A SparkConf to use in tests. Can be modified before calling setupStreams to configure things.
-  val conf = new SparkConf()
-    .setMaster(master)
-    .setAppName(framework)
-
-  // Default before function for any streaming test suite. Override this
-  // if you want to add your stuff to "before" (i.e., don't call before { } )
-  def beforeFunction() {
-    if (useManualClock) {
-      logInfo("Using manual clock")
-      conf.set("spark.streaming.clock", "org.apache.spark.streaming.util.ManualClock")
-    } else {
-      logInfo("Using real clock")
-      conf.set("spark.streaming.clock", "org.apache.spark.streaming.util.SystemClock")
-    }
-  }
-
-  // Default after function for any streaming test suite. Override this
-  // if you want to add your stuff to "after" (i.e., don't call after { } )
-  def afterFunction() {
-    System.clearProperty("spark.streaming.clock")
-  }
-
-  before(beforeFunction)
-  after(afterFunction)
-
-  /**
-   * Run a block of code with the given StreamingContext and automatically
-   * stop the context when the block completes or when an exception is thrown.
-   */
-  def withStreamingContext[R](ssc: StreamingContext)(block: StreamingContext => R): R = {
-    try {
-      block(ssc)
-    } finally {
-      try {
-        ssc.stop(stopSparkContext = true)
-      } catch {
-        case e: Exception =>
-          logError("Error stopping StreamingContext", e)
-      }
-    }
-  }
-
-  /**
-   * Set up required DStreams to test the DStream operation using the two sequences
-   * of input collections.
-   */
-  def setupStreams[U: ClassTag, V: ClassTag](
-      input: Seq[Seq[U]],
-      operation: DStream[U] => DStream[V],
-      numPartitions: Int = numInputPartitions
-    ): StreamingContext = {
-    // Create StreamingContext
-    val ssc = new StreamingContext(conf, batchDuration)
-    if (checkpointDir != null) {
-      ssc.checkpoint(checkpointDir)
-    }
-
-    // Setup the stream computation
-    val inputStream = new TestInputStream(ssc, input, numPartitions)
-    val operatedStream = operation(inputStream)
-    val outputStream = new TestOutputStreamWithPartitions(operatedStream,
-      new ArrayBuffer[Seq[Seq[V]]] with SynchronizedBuffer[Seq[Seq[V]]])
-    outputStream.register()
-    ssc
-  }
-
-  /**
-   * Set up required DStreams to test the binary operation using the sequence
-   * of input collections.
-   */
-  def setupStreams[U: ClassTag, V: ClassTag, W: ClassTag](
-      input1: Seq[Seq[U]],
-      input2: Seq[Seq[V]],
-      operation: (DStream[U], DStream[V]) => DStream[W]
-    ): StreamingContext = {
-    // Create StreamingContext
-    val ssc = new StreamingContext(conf, batchDuration)
-    if (checkpointDir != null) {
-      ssc.checkpoint(checkpointDir)
-    }
-
-    // Setup the stream computation
-    val inputStream1 = new TestInputStream(ssc, input1, numInputPartitions)
-    val inputStream2 = new TestInputStream(ssc, input2, numInputPartitions)
-    val operatedStream = operation(inputStream1, inputStream2)
-    val outputStream = new TestOutputStreamWithPartitions(operatedStream,
-      new ArrayBuffer[Seq[Seq[W]]] with SynchronizedBuffer[Seq[Seq[W]]])
-    outputStream.register()
-    ssc
-  }
-
-  /**
-   * Runs the streams set up in `ssc` on manual clock for `numBatches` batches and
-   * returns the collected output. It will wait until `numExpectedOutput` number of
-   * output data has been collected or timeout (set by `maxWaitTimeMillis`) is reached.
-   *
-   * Returns a sequence of items for each RDD.
-   */
-  def runStreams[V: ClassTag](
-      ssc: StreamingContext,
-      numBatches: Int,
-      numExpectedOutput: Int
-    ): Seq[Seq[V]] = {
-    // Flatten each RDD into a single Seq
-    runStreamsWithPartitions(ssc, numBatches, numExpectedOutput).map(_.flatten.toSeq)
-  }
-
-  /**
-   * Runs the streams set up in `ssc` on manual clock for `numBatches` batches and
-   * returns the collected output. It will wait until `numExpectedOutput` number of
-   * output data has been collected or timeout (set by `maxWaitTimeMillis`) is reached.
-   *
-   * Returns a sequence of RDD's. Each RDD is represented as several sequences of items, each
-   * representing one partition.
-   */
-  def runStreamsWithPartitions[V: ClassTag](
-      ssc: StreamingContext,
-      numBatches: Int,
-      numExpectedOutput: Int
-    ): Seq[Seq[Seq[V]]] = {
-    assert(numBatches > 0, "Number of batches to run stream computation is zero")
-    assert(numExpectedOutput > 0, "Number of expected outputs after " + numBatches + " is zero")
-    logInfo("numBatches = " + numBatches + ", numExpectedOutput = " + numExpectedOutput)
-
-    // Get the output buffer
-    val outputStream = ssc.graph.getOutputStreams.
-      filter(_.isInstanceOf[TestOutputStreamWithPartitions[_]]).
-      head.asInstanceOf[TestOutputStreamWithPartitions[V]]
-    val output = outputStream.output
-
-    try {
-      // Start computation
-      ssc.start()
-
-      // Advance manual clock
-      val clock = ssc.scheduler.clock.asInstanceOf[ManualClock]
-      logInfo("Manual clock before advancing = " + clock.time)
-      if (actuallyWait) {
-        for (i <- 1 to numBatches) {
-          logInfo("Actually waiting for " + batchDuration)
-          clock.addToTime(batchDuration.milliseconds)
-          Thread.sleep(batchDuration.milliseconds)
-        }
-      } else {
-        clock.addToTime(numBatches * batchDuration.milliseconds)
-      }
-      logInfo("Manual clock after advancing = " + clock.time)
-
-      // Wait until expected number of output items have been generated
-      val startTime = System.currentTimeMillis()
-      while (output.size < numExpectedOutput && System.currentTimeMillis() - startTime < maxWaitTimeMillis) {
-        logInfo("output.size = " + output.size + ", numExpectedOutput = " + numExpectedOutput)
-        ssc.awaitTermination(50)
-      }
-      val timeTaken = System.currentTimeMillis() - startTime
-      logInfo("Output generated in " + timeTaken + " milliseconds")
-      output.foreach(x => logInfo("[" + x.mkString(",") + "]"))
-      assert(timeTaken < maxWaitTimeMillis, "Operation timed out after " + timeTaken + " ms")
-      assert(output.size === numExpectedOutput, "Unexpected number of outputs generated")
-
-      Thread.sleep(100) // Give some time for the forgetting old RDDs to complete
-    } finally {
-      ssc.stop(stopSparkContext = true)
-    }
-    output
-  }
-
-  /**
-   * Verify whether the output values after running a DStream operation
-   * is same as the expected output values, by comparing the output
-   * collections either as lists (order matters) or sets (order does not matter)
-   */
-  def verifyOutput[V: ClassTag](
-      output: Seq[Seq[V]],
-      expectedOutput: Seq[Seq[V]],
-      useSet: Boolean
-    ) {
-    logInfo("--------------------------------")
-    logInfo("output.size = " + output.size)
-    logInfo("output")
-    output.foreach(x => logInfo("[" + x.mkString(",") + "]"))
-    logInfo("expected output.size = " + expectedOutput.size)
-    logInfo("expected output")
-    expectedOutput.foreach(x => logInfo("[" + x.mkString(",") + "]"))
-    logInfo("--------------------------------")
-
-    // Match the output with the expected output
-    assert(output.size === expectedOutput.size, "Number of outputs do not match")
-    for (i <- 0 until output.size) {
-      if (useSet) {
-        assert(output(i).toSet === expectedOutput(i).toSet)
-      } else {
-        assert(output(i).toList === expectedOutput(i).toList)
-      }
-    }
-    logInfo("Output verified successfully")
-  }
-
-  /**
-   * Test unary DStream operation with a list of inputs, with number of
-   * batches to run same as the number of expected output values
-   */
-  def testOperation[U: ClassTag, V: ClassTag](
-      input: Seq[Seq[U]],
-      operation: DStream[U] => DStream[V],
-      expectedOutput: Seq[Seq[V]],
-      useSet: Boolean = false
-    ) {
-    testOperation[U, V](input, operation, expectedOutput, -1, useSet)
-  }
-
-  /**
-   * Test unary DStream operation with a list of inputs
-   * @param input      Sequence of input collections
-   * @param operation  Binary DStream operation to be applied to the 2 inputs
-   * @param expectedOutput Sequence of expected output collections
-   * @param numBatches Number of batches to run the operation for
-   * @param useSet     Compare the output values with the expected output values
-   *                   as sets (order matters) or as lists (order does not matter)
-   */
-  def testOperation[U: ClassTag, V: ClassTag](
-      input: Seq[Seq[U]],
-      operation: DStream[U] => DStream[V],
-      expectedOutput: Seq[Seq[V]],
-      numBatches: Int,
-      useSet: Boolean
-    ) {
-    val numBatches_ = if (numBatches > 0) numBatches else expectedOutput.size
-    withStreamingContext(setupStreams[U, V](input, operation)) { ssc =>
-      val output = runStreams[V](ssc, numBatches_, expectedOutput.size)
-      verifyOutput[V](output, expectedOutput, useSet)
-    }
-  }
-
-  /**
-   * Test binary DStream operation with two lists of inputs, with number of
-   * batches to run same as the number of expected output values
-   */
-  def testOperation[U: ClassTag, V: ClassTag, W: ClassTag](
-      input1: Seq[Seq[U]],
-      input2: Seq[Seq[V]],
-      operation: (DStream[U], DStream[V]) => DStream[W],
-      expectedOutput: Seq[Seq[W]],
-      useSet: Boolean
-    ) {
-    testOperation[U, V, W](input1, input2, operation, expectedOutput, -1, useSet)
-  }
-
-  /**
-   * Test binary DStream operation with two lists of inputs
-   * @param input1     First sequence of input collections
-   * @param input2     Second sequence of input collections
-   * @param operation  Binary DStream operation to be applied to the 2 inputs
-   * @param expectedOutput Sequence of expected output collections
-   * @param numBatches Number of batches to run the operation for
-   * @param useSet     Compare the output values with the expected output values
-   *                   as sets (order matters) or as lists (order does not matter)
-   */
-  def testOperation[U: ClassTag, V: ClassTag, W: ClassTag](
-      input1: Seq[Seq[U]],
-      input2: Seq[Seq[V]],
-      operation: (DStream[U], DStream[V]) => DStream[W],
-      expectedOutput: Seq[Seq[W]],
-      numBatches: Int,
-      useSet: Boolean
-    ) {
-    val numBatches_ = if (numBatches > 0) numBatches else expectedOutput.size
-    withStreamingContext(setupStreams[U, V, W](input1, input2, operation)) { ssc =>
-      val output = runStreams[W](ssc, numBatches_, expectedOutput.size)
-      verifyOutput[W](output, expectedOutput, useSet)
-    }
-  }
-}
diff --git a/external/flume/src/test/scala/org/apache/spark/streaming/flume/FlumePollingStreamSuite.scala b/external/flume/src/test/scala/org/apache/spark/streaming/flume/FlumePollingStreamSuite.scala
index 475026e8eb140..ba079d84e4529 100644
--- a/external/flume/src/test/scala/org/apache/spark/streaming/flume/FlumePollingStreamSuite.scala
+++ b/external/flume/src/test/scala/org/apache/spark/streaming/flume/FlumePollingStreamSuite.scala
@@ -20,9 +20,6 @@ package org.apache.spark.streaming.flume
 
 import java.net.InetSocketAddress
 import java.util.concurrent.{Callable, ExecutorCompletionService, Executors}
-import java.util.Random
-
-import org.apache.spark.TestUtils
 
 import scala.collection.JavaConversions._
 import scala.collection.mutable.{SynchronizedBuffer, ArrayBuffer}
@@ -32,20 +29,28 @@ import org.apache.flume.channel.MemoryChannel
 import org.apache.flume.conf.Configurables
 import org.apache.flume.event.EventBuilder
 
+import org.scalatest.{BeforeAndAfter, FunSuite}
+
+import org.apache.spark.{SparkConf, Logging}
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.streaming.dstream.ReceiverInputDStream
 import org.apache.spark.streaming.util.ManualClock
-import org.apache.spark.streaming.{TestSuiteBase, TestOutputStream, StreamingContext}
+import org.apache.spark.streaming.{Seconds, TestOutputStream, StreamingContext}
 import org.apache.spark.streaming.flume.sink._
 import org.apache.spark.util.Utils
 
-class FlumePollingStreamSuite extends TestSuiteBase {
+class FlumePollingStreamSuite extends FunSuite with BeforeAndAfter with Logging {
 
   val batchCount = 5
   val eventsPerBatch = 100
   val totalEventsPerChannel = batchCount * eventsPerBatch
   val channelCapacity = 5000
   val maxAttempts = 5
+  val batchDuration = Seconds(1)
+
+  val conf = new SparkConf()
+    .setMaster("local[2]")
+    .setAppName(this.getClass.getSimpleName)
 
   test("flume polling test") {
     testMultipleTimes(testFlumePolling)
diff --git a/external/kafka/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala b/external/kafka/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala
deleted file mode 100644
index 76b3b73a2ff3b..0000000000000
--- a/external/kafka/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala
+++ /dev/null
@@ -1,413 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.streaming
-
-import java.io.{ObjectInputStream, IOException}
-
-import scala.collection.mutable.ArrayBuffer
-import scala.collection.mutable.SynchronizedBuffer
-import scala.reflect.ClassTag
-
-import org.scalatest.{BeforeAndAfter, FunSuite}
-
-import org.apache.spark.streaming.dstream.{DStream, InputDStream, ForEachDStream}
-import org.apache.spark.streaming.util.ManualClock
-import org.apache.spark.{SparkConf, Logging}
-import org.apache.spark.rdd.RDD
-import org.apache.spark.util.Utils
-
-/**
- * This is a input stream just for the testsuites. This is equivalent to a checkpointable,
- * replayable, reliable message queue like Kafka. It requires a sequence as input, and
- * returns the i_th element at the i_th batch unde manual clock.
- */
-class TestInputStream[T: ClassTag](ssc_ : StreamingContext, input: Seq[Seq[T]], numPartitions: Int)
-  extends InputDStream[T](ssc_) {
-
-  def start() {}
-
-  def stop() {}
-
-  def compute(validTime: Time): Option[RDD[T]] = {
-    logInfo("Computing RDD for time " + validTime)
-    val index = ((validTime - zeroTime) / slideDuration - 1).toInt
-    val selectedInput = if (index < input.size) input(index) else Seq[T]()
-
-    // lets us test cases where RDDs are not created
-    if (selectedInput == null)
-      return None
-
-    val rdd = ssc.sc.makeRDD(selectedInput, numPartitions)
-    logInfo("Created RDD " + rdd.id + " with " + selectedInput)
-    Some(rdd)
-  }
-}
-
-/**
- * This is a output stream just for the testsuites. All the output is collected into a
- * ArrayBuffer. This buffer is wiped clean on being restored from checkpoint.
- *
- * The buffer contains a sequence of RDD's, each containing a sequence of items
- */
-class TestOutputStream[T: ClassTag](parent: DStream[T],
-    val output: ArrayBuffer[Seq[T]] = ArrayBuffer[Seq[T]]())
-  extends ForEachDStream[T](parent, (rdd: RDD[T], t: Time) => {
-    val collected = rdd.collect()
-    output += collected
-  }) {
-
-  // This is to clear the output buffer every it is read from a checkpoint
-  @throws(classOf[IOException])
-  private def readObject(ois: ObjectInputStream): Unit = Utils.tryOrIOException {
-    ois.defaultReadObject()
-    output.clear()
-  }
-}
-
-/**
- * This is a output stream just for the testsuites. All the output is collected into a
- * ArrayBuffer. This buffer is wiped clean on being restored from checkpoint.
- *
- * The buffer contains a sequence of RDD's, each containing a sequence of partitions, each
- * containing a sequence of items.
- */
-class TestOutputStreamWithPartitions[T: ClassTag](parent: DStream[T],
-    val output: ArrayBuffer[Seq[Seq[T]]] = ArrayBuffer[Seq[Seq[T]]]())
-  extends ForEachDStream[T](parent, (rdd: RDD[T], t: Time) => {
-    val collected = rdd.glom().collect().map(_.toSeq)
-    output += collected
-  }) {
-
-  // This is to clear the output buffer every it is read from a checkpoint
-  @throws(classOf[IOException])
-  private def readObject(ois: ObjectInputStream): Unit = Utils.tryOrIOException {
-    ois.defaultReadObject()
-    output.clear()
-  }
-
-  def toTestOutputStream = new TestOutputStream[T](this.parent, this.output.map(_.flatten))
-}
-
-/**
- * This is the base trait for Spark Streaming testsuites. This provides basic functionality
- * to run user-defined set of input on user-defined stream operations, and verify the output.
- */
-trait TestSuiteBase extends FunSuite with BeforeAndAfter with Logging {
-
-  // Name of the framework for Spark context
-  def framework = this.getClass.getSimpleName
-
-  // Master for Spark context
-  def master = "local[2]"
-
-  // Batch duration
-  def batchDuration = Seconds(1)
-
-  // Directory where the checkpoint data will be saved
-  lazy val checkpointDir = {
-    val dir = Utils.createTempDir()
-    logDebug(s"checkpointDir: $dir")
-    dir.toString
-  }
-
-  // Number of partitions of the input parallel collections created for testing
-  def numInputPartitions = 2
-
-  // Maximum time to wait before the test times out
-  def maxWaitTimeMillis = 10000
-
-  // Whether to use manual clock or not
-  def useManualClock = true
-
-  // Whether to actually wait in real time before changing manual clock
-  def actuallyWait = false
-
-  //// A SparkConf to use in tests. Can be modified before calling setupStreams to configure things.
-  val conf = new SparkConf()
-    .setMaster(master)
-    .setAppName(framework)
-
-  // Default before function for any streaming test suite. Override this
-  // if you want to add your stuff to "before" (i.e., don't call before { } )
-  def beforeFunction() {
-    if (useManualClock) {
-      logInfo("Using manual clock")
-      conf.set("spark.streaming.clock", "org.apache.spark.streaming.util.ManualClock")
-    } else {
-      logInfo("Using real clock")
-      conf.set("spark.streaming.clock", "org.apache.spark.streaming.util.SystemClock")
-    }
-  }
-
-  // Default after function for any streaming test suite. Override this
-  // if you want to add your stuff to "after" (i.e., don't call after { } )
-  def afterFunction() {
-    System.clearProperty("spark.streaming.clock")
-  }
-
-  before(beforeFunction)
-  after(afterFunction)
-
-  /**
-   * Run a block of code with the given StreamingContext and automatically
-   * stop the context when the block completes or when an exception is thrown.
-   */
-  def withStreamingContext[R](ssc: StreamingContext)(block: StreamingContext => R): R = {
-    try {
-      block(ssc)
-    } finally {
-      try {
-        ssc.stop(stopSparkContext = true)
-      } catch {
-        case e: Exception =>
-          logError("Error stopping StreamingContext", e)
-      }
-    }
-  }
-
-  /**
-   * Set up required DStreams to test the DStream operation using the two sequences
-   * of input collections.
-   */
-  def setupStreams[U: ClassTag, V: ClassTag](
-      input: Seq[Seq[U]],
-      operation: DStream[U] => DStream[V],
-      numPartitions: Int = numInputPartitions
-    ): StreamingContext = {
-    // Create StreamingContext
-    val ssc = new StreamingContext(conf, batchDuration)
-    if (checkpointDir != null) {
-      ssc.checkpoint(checkpointDir)
-    }
-
-    // Setup the stream computation
-    val inputStream = new TestInputStream(ssc, input, numPartitions)
-    val operatedStream = operation(inputStream)
-    val outputStream = new TestOutputStreamWithPartitions(operatedStream,
-      new ArrayBuffer[Seq[Seq[V]]] with SynchronizedBuffer[Seq[Seq[V]]])
-    outputStream.register()
-    ssc
-  }
-
-  /**
-   * Set up required DStreams to test the binary operation using the sequence
-   * of input collections.
-   */
-  def setupStreams[U: ClassTag, V: ClassTag, W: ClassTag](
-      input1: Seq[Seq[U]],
-      input2: Seq[Seq[V]],
-      operation: (DStream[U], DStream[V]) => DStream[W]
-    ): StreamingContext = {
-    // Create StreamingContext
-    val ssc = new StreamingContext(conf, batchDuration)
-    if (checkpointDir != null) {
-      ssc.checkpoint(checkpointDir)
-    }
-
-    // Setup the stream computation
-    val inputStream1 = new TestInputStream(ssc, input1, numInputPartitions)
-    val inputStream2 = new TestInputStream(ssc, input2, numInputPartitions)
-    val operatedStream = operation(inputStream1, inputStream2)
-    val outputStream = new TestOutputStreamWithPartitions(operatedStream,
-      new ArrayBuffer[Seq[Seq[W]]] with SynchronizedBuffer[Seq[Seq[W]]])
-    outputStream.register()
-    ssc
-  }
-
-  /**
-   * Runs the streams set up in `ssc` on manual clock for `numBatches` batches and
-   * returns the collected output. It will wait until `numExpectedOutput` number of
-   * output data has been collected or timeout (set by `maxWaitTimeMillis`) is reached.
-   *
-   * Returns a sequence of items for each RDD.
-   */
-  def runStreams[V: ClassTag](
-      ssc: StreamingContext,
-      numBatches: Int,
-      numExpectedOutput: Int
-    ): Seq[Seq[V]] = {
-    // Flatten each RDD into a single Seq
-    runStreamsWithPartitions(ssc, numBatches, numExpectedOutput).map(_.flatten.toSeq)
-  }
-
-  /**
-   * Runs the streams set up in `ssc` on manual clock for `numBatches` batches and
-   * returns the collected output. It will wait until `numExpectedOutput` number of
-   * output data has been collected or timeout (set by `maxWaitTimeMillis`) is reached.
-   *
-   * Returns a sequence of RDD's. Each RDD is represented as several sequences of items, each
-   * representing one partition.
-   */
-  def runStreamsWithPartitions[V: ClassTag](
-      ssc: StreamingContext,
-      numBatches: Int,
-      numExpectedOutput: Int
-    ): Seq[Seq[Seq[V]]] = {
-    assert(numBatches > 0, "Number of batches to run stream computation is zero")
-    assert(numExpectedOutput > 0, "Number of expected outputs after " + numBatches + " is zero")
-    logInfo("numBatches = " + numBatches + ", numExpectedOutput = " + numExpectedOutput)
-
-    // Get the output buffer
-    val outputStream = ssc.graph.getOutputStreams.
-      filter(_.isInstanceOf[TestOutputStreamWithPartitions[_]]).
-      head.asInstanceOf[TestOutputStreamWithPartitions[V]]
-    val output = outputStream.output
-
-    try {
-      // Start computation
-      ssc.start()
-
-      // Advance manual clock
-      val clock = ssc.scheduler.clock.asInstanceOf[ManualClock]
-      logInfo("Manual clock before advancing = " + clock.time)
-      if (actuallyWait) {
-        for (i <- 1 to numBatches) {
-          logInfo("Actually waiting for " + batchDuration)
-          clock.addToTime(batchDuration.milliseconds)
-          Thread.sleep(batchDuration.milliseconds)
-        }
-      } else {
-        clock.addToTime(numBatches * batchDuration.milliseconds)
-      }
-      logInfo("Manual clock after advancing = " + clock.time)
-
-      // Wait until expected number of output items have been generated
-      val startTime = System.currentTimeMillis()
-      while (output.size < numExpectedOutput && System.currentTimeMillis() - startTime < maxWaitTimeMillis) {
-        logInfo("output.size = " + output.size + ", numExpectedOutput = " + numExpectedOutput)
-        ssc.awaitTermination(50)
-      }
-      val timeTaken = System.currentTimeMillis() - startTime
-      logInfo("Output generated in " + timeTaken + " milliseconds")
-      output.foreach(x => logInfo("[" + x.mkString(",") + "]"))
-      assert(timeTaken < maxWaitTimeMillis, "Operation timed out after " + timeTaken + " ms")
-      assert(output.size === numExpectedOutput, "Unexpected number of outputs generated")
-
-      Thread.sleep(100) // Give some time for the forgetting old RDDs to complete
-    } finally {
-      ssc.stop(stopSparkContext = true)
-    }
-    output
-  }
-
-  /**
-   * Verify whether the output values after running a DStream operation
-   * is same as the expected output values, by comparing the output
-   * collections either as lists (order matters) or sets (order does not matter)
-   */
-  def verifyOutput[V: ClassTag](
-      output: Seq[Seq[V]],
-      expectedOutput: Seq[Seq[V]],
-      useSet: Boolean
-    ) {
-    logInfo("--------------------------------")
-    logInfo("output.size = " + output.size)
-    logInfo("output")
-    output.foreach(x => logInfo("[" + x.mkString(",") + "]"))
-    logInfo("expected output.size = " + expectedOutput.size)
-    logInfo("expected output")
-    expectedOutput.foreach(x => logInfo("[" + x.mkString(",") + "]"))
-    logInfo("--------------------------------")
-
-    // Match the output with the expected output
-    assert(output.size === expectedOutput.size, "Number of outputs do not match")
-    for (i <- 0 until output.size) {
-      if (useSet) {
-        assert(output(i).toSet === expectedOutput(i).toSet)
-      } else {
-        assert(output(i).toList === expectedOutput(i).toList)
-      }
-    }
-    logInfo("Output verified successfully")
-  }
-
-  /**
-   * Test unary DStream operation with a list of inputs, with number of
-   * batches to run same as the number of expected output values
-   */
-  def testOperation[U: ClassTag, V: ClassTag](
-      input: Seq[Seq[U]],
-      operation: DStream[U] => DStream[V],
-      expectedOutput: Seq[Seq[V]],
-      useSet: Boolean = false
-    ) {
-    testOperation[U, V](input, operation, expectedOutput, -1, useSet)
-  }
-
-  /**
-   * Test unary DStream operation with a list of inputs
-   * @param input      Sequence of input collections
-   * @param operation  Binary DStream operation to be applied to the 2 inputs
-   * @param expectedOutput Sequence of expected output collections
-   * @param numBatches Number of batches to run the operation for
-   * @param useSet     Compare the output values with the expected output values
-   *                   as sets (order matters) or as lists (order does not matter)
-   */
-  def testOperation[U: ClassTag, V: ClassTag](
-      input: Seq[Seq[U]],
-      operation: DStream[U] => DStream[V],
-      expectedOutput: Seq[Seq[V]],
-      numBatches: Int,
-      useSet: Boolean
-    ) {
-    val numBatches_ = if (numBatches > 0) numBatches else expectedOutput.size
-    withStreamingContext(setupStreams[U, V](input, operation)) { ssc =>
-      val output = runStreams[V](ssc, numBatches_, expectedOutput.size)
-      verifyOutput[V](output, expectedOutput, useSet)
-    }
-  }
-
-  /**
-   * Test binary DStream operation with two lists of inputs, with number of
-   * batches to run same as the number of expected output values
-   */
-  def testOperation[U: ClassTag, V: ClassTag, W: ClassTag](
-      input1: Seq[Seq[U]],
-      input2: Seq[Seq[V]],
-      operation: (DStream[U], DStream[V]) => DStream[W],
-      expectedOutput: Seq[Seq[W]],
-      useSet: Boolean
-    ) {
-    testOperation[U, V, W](input1, input2, operation, expectedOutput, -1, useSet)
-  }
-
-  /**
-   * Test binary DStream operation with two lists of inputs
-   * @param input1     First sequence of input collections
-   * @param input2     Second sequence of input collections
-   * @param operation  Binary DStream operation to be applied to the 2 inputs
-   * @param expectedOutput Sequence of expected output collections
-   * @param numBatches Number of batches to run the operation for
-   * @param useSet     Compare the output values with the expected output values
-   *                   as sets (order matters) or as lists (order does not matter)
-   */
-  def testOperation[U: ClassTag, V: ClassTag, W: ClassTag](
-      input1: Seq[Seq[U]],
-      input2: Seq[Seq[V]],
-      operation: (DStream[U], DStream[V]) => DStream[W],
-      expectedOutput: Seq[Seq[W]],
-      numBatches: Int,
-      useSet: Boolean
-    ) {
-    val numBatches_ = if (numBatches > 0) numBatches else expectedOutput.size
-    withStreamingContext(setupStreams[U, V, W](input1, input2, operation)) { ssc =>
-      val output = runStreams[W](ssc, numBatches_, expectedOutput.size)
-      verifyOutput[W](output, expectedOutput, useSet)
-    }
-  }
-}
diff --git a/external/mqtt/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala b/external/mqtt/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala
deleted file mode 100644
index 76b3b73a2ff3b..0000000000000
--- a/external/mqtt/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala
+++ /dev/null
@@ -1,413 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.streaming
-
-import java.io.{ObjectInputStream, IOException}
-
-import scala.collection.mutable.ArrayBuffer
-import scala.collection.mutable.SynchronizedBuffer
-import scala.reflect.ClassTag
-
-import org.scalatest.{BeforeAndAfter, FunSuite}
-
-import org.apache.spark.streaming.dstream.{DStream, InputDStream, ForEachDStream}
-import org.apache.spark.streaming.util.ManualClock
-import org.apache.spark.{SparkConf, Logging}
-import org.apache.spark.rdd.RDD
-import org.apache.spark.util.Utils
-
-/**
- * This is a input stream just for the testsuites. This is equivalent to a checkpointable,
- * replayable, reliable message queue like Kafka. It requires a sequence as input, and
- * returns the i_th element at the i_th batch unde manual clock.
- */
-class TestInputStream[T: ClassTag](ssc_ : StreamingContext, input: Seq[Seq[T]], numPartitions: Int)
-  extends InputDStream[T](ssc_) {
-
-  def start() {}
-
-  def stop() {}
-
-  def compute(validTime: Time): Option[RDD[T]] = {
-    logInfo("Computing RDD for time " + validTime)
-    val index = ((validTime - zeroTime) / slideDuration - 1).toInt
-    val selectedInput = if (index < input.size) input(index) else Seq[T]()
-
-    // lets us test cases where RDDs are not created
-    if (selectedInput == null)
-      return None
-
-    val rdd = ssc.sc.makeRDD(selectedInput, numPartitions)
-    logInfo("Created RDD " + rdd.id + " with " + selectedInput)
-    Some(rdd)
-  }
-}
-
-/**
- * This is a output stream just for the testsuites. All the output is collected into a
- * ArrayBuffer. This buffer is wiped clean on being restored from checkpoint.
- *
- * The buffer contains a sequence of RDD's, each containing a sequence of items
- */
-class TestOutputStream[T: ClassTag](parent: DStream[T],
-    val output: ArrayBuffer[Seq[T]] = ArrayBuffer[Seq[T]]())
-  extends ForEachDStream[T](parent, (rdd: RDD[T], t: Time) => {
-    val collected = rdd.collect()
-    output += collected
-  }) {
-
-  // This is to clear the output buffer every it is read from a checkpoint
-  @throws(classOf[IOException])
-  private def readObject(ois: ObjectInputStream): Unit = Utils.tryOrIOException {
-    ois.defaultReadObject()
-    output.clear()
-  }
-}
-
-/**
- * This is a output stream just for the testsuites. All the output is collected into a
- * ArrayBuffer. This buffer is wiped clean on being restored from checkpoint.
- *
- * The buffer contains a sequence of RDD's, each containing a sequence of partitions, each
- * containing a sequence of items.
- */
-class TestOutputStreamWithPartitions[T: ClassTag](parent: DStream[T],
-    val output: ArrayBuffer[Seq[Seq[T]]] = ArrayBuffer[Seq[Seq[T]]]())
-  extends ForEachDStream[T](parent, (rdd: RDD[T], t: Time) => {
-    val collected = rdd.glom().collect().map(_.toSeq)
-    output += collected
-  }) {
-
-  // This is to clear the output buffer every it is read from a checkpoint
-  @throws(classOf[IOException])
-  private def readObject(ois: ObjectInputStream): Unit = Utils.tryOrIOException {
-    ois.defaultReadObject()
-    output.clear()
-  }
-
-  def toTestOutputStream = new TestOutputStream[T](this.parent, this.output.map(_.flatten))
-}
-
-/**
- * This is the base trait for Spark Streaming testsuites. This provides basic functionality
- * to run user-defined set of input on user-defined stream operations, and verify the output.
- */
-trait TestSuiteBase extends FunSuite with BeforeAndAfter with Logging {
-
-  // Name of the framework for Spark context
-  def framework = this.getClass.getSimpleName
-
-  // Master for Spark context
-  def master = "local[2]"
-
-  // Batch duration
-  def batchDuration = Seconds(1)
-
-  // Directory where the checkpoint data will be saved
-  lazy val checkpointDir = {
-    val dir = Utils.createTempDir()
-    logDebug(s"checkpointDir: $dir")
-    dir.toString
-  }
-
-  // Number of partitions of the input parallel collections created for testing
-  def numInputPartitions = 2
-
-  // Maximum time to wait before the test times out
-  def maxWaitTimeMillis = 10000
-
-  // Whether to use manual clock or not
-  def useManualClock = true
-
-  // Whether to actually wait in real time before changing manual clock
-  def actuallyWait = false
-
-  //// A SparkConf to use in tests. Can be modified before calling setupStreams to configure things.
-  val conf = new SparkConf()
-    .setMaster(master)
-    .setAppName(framework)
-
-  // Default before function for any streaming test suite. Override this
-  // if you want to add your stuff to "before" (i.e., don't call before { } )
-  def beforeFunction() {
-    if (useManualClock) {
-      logInfo("Using manual clock")
-      conf.set("spark.streaming.clock", "org.apache.spark.streaming.util.ManualClock")
-    } else {
-      logInfo("Using real clock")
-      conf.set("spark.streaming.clock", "org.apache.spark.streaming.util.SystemClock")
-    }
-  }
-
-  // Default after function for any streaming test suite. Override this
-  // if you want to add your stuff to "after" (i.e., don't call after { } )
-  def afterFunction() {
-    System.clearProperty("spark.streaming.clock")
-  }
-
-  before(beforeFunction)
-  after(afterFunction)
-
-  /**
-   * Run a block of code with the given StreamingContext and automatically
-   * stop the context when the block completes or when an exception is thrown.
-   */
-  def withStreamingContext[R](ssc: StreamingContext)(block: StreamingContext => R): R = {
-    try {
-      block(ssc)
-    } finally {
-      try {
-        ssc.stop(stopSparkContext = true)
-      } catch {
-        case e: Exception =>
-          logError("Error stopping StreamingContext", e)
-      }
-    }
-  }
-
-  /**
-   * Set up required DStreams to test the DStream operation using the two sequences
-   * of input collections.
-   */
-  def setupStreams[U: ClassTag, V: ClassTag](
-      input: Seq[Seq[U]],
-      operation: DStream[U] => DStream[V],
-      numPartitions: Int = numInputPartitions
-    ): StreamingContext = {
-    // Create StreamingContext
-    val ssc = new StreamingContext(conf, batchDuration)
-    if (checkpointDir != null) {
-      ssc.checkpoint(checkpointDir)
-    }
-
-    // Setup the stream computation
-    val inputStream = new TestInputStream(ssc, input, numPartitions)
-    val operatedStream = operation(inputStream)
-    val outputStream = new TestOutputStreamWithPartitions(operatedStream,
-      new ArrayBuffer[Seq[Seq[V]]] with SynchronizedBuffer[Seq[Seq[V]]])
-    outputStream.register()
-    ssc
-  }
-
-  /**
-   * Set up required DStreams to test the binary operation using the sequence
-   * of input collections.
-   */
-  def setupStreams[U: ClassTag, V: ClassTag, W: ClassTag](
-      input1: Seq[Seq[U]],
-      input2: Seq[Seq[V]],
-      operation: (DStream[U], DStream[V]) => DStream[W]
-    ): StreamingContext = {
-    // Create StreamingContext
-    val ssc = new StreamingContext(conf, batchDuration)
-    if (checkpointDir != null) {
-      ssc.checkpoint(checkpointDir)
-    }
-
-    // Setup the stream computation
-    val inputStream1 = new TestInputStream(ssc, input1, numInputPartitions)
-    val inputStream2 = new TestInputStream(ssc, input2, numInputPartitions)
-    val operatedStream = operation(inputStream1, inputStream2)
-    val outputStream = new TestOutputStreamWithPartitions(operatedStream,
-      new ArrayBuffer[Seq[Seq[W]]] with SynchronizedBuffer[Seq[Seq[W]]])
-    outputStream.register()
-    ssc
-  }
-
-  /**
-   * Runs the streams set up in `ssc` on manual clock for `numBatches` batches and
-   * returns the collected output. It will wait until `numExpectedOutput` number of
-   * output data has been collected or timeout (set by `maxWaitTimeMillis`) is reached.
-   *
-   * Returns a sequence of items for each RDD.
-   */
-  def runStreams[V: ClassTag](
-      ssc: StreamingContext,
-      numBatches: Int,
-      numExpectedOutput: Int
-    ): Seq[Seq[V]] = {
-    // Flatten each RDD into a single Seq
-    runStreamsWithPartitions(ssc, numBatches, numExpectedOutput).map(_.flatten.toSeq)
-  }
-
-  /**
-   * Runs the streams set up in `ssc` on manual clock for `numBatches` batches and
-   * returns the collected output. It will wait until `numExpectedOutput` number of
-   * output data has been collected or timeout (set by `maxWaitTimeMillis`) is reached.
-   *
-   * Returns a sequence of RDD's. Each RDD is represented as several sequences of items, each
-   * representing one partition.
-   */
-  def runStreamsWithPartitions[V: ClassTag](
-      ssc: StreamingContext,
-      numBatches: Int,
-      numExpectedOutput: Int
-    ): Seq[Seq[Seq[V]]] = {
-    assert(numBatches > 0, "Number of batches to run stream computation is zero")
-    assert(numExpectedOutput > 0, "Number of expected outputs after " + numBatches + " is zero")
-    logInfo("numBatches = " + numBatches + ", numExpectedOutput = " + numExpectedOutput)
-
-    // Get the output buffer
-    val outputStream = ssc.graph.getOutputStreams.
-      filter(_.isInstanceOf[TestOutputStreamWithPartitions[_]]).
-      head.asInstanceOf[TestOutputStreamWithPartitions[V]]
-    val output = outputStream.output
-
-    try {
-      // Start computation
-      ssc.start()
-
-      // Advance manual clock
-      val clock = ssc.scheduler.clock.asInstanceOf[ManualClock]
-      logInfo("Manual clock before advancing = " + clock.time)
-      if (actuallyWait) {
-        for (i <- 1 to numBatches) {
-          logInfo("Actually waiting for " + batchDuration)
-          clock.addToTime(batchDuration.milliseconds)
-          Thread.sleep(batchDuration.milliseconds)
-        }
-      } else {
-        clock.addToTime(numBatches * batchDuration.milliseconds)
-      }
-      logInfo("Manual clock after advancing = " + clock.time)
-
-      // Wait until expected number of output items have been generated
-      val startTime = System.currentTimeMillis()
-      while (output.size < numExpectedOutput && System.currentTimeMillis() - startTime < maxWaitTimeMillis) {
-        logInfo("output.size = " + output.size + ", numExpectedOutput = " + numExpectedOutput)
-        ssc.awaitTermination(50)
-      }
-      val timeTaken = System.currentTimeMillis() - startTime
-      logInfo("Output generated in " + timeTaken + " milliseconds")
-      output.foreach(x => logInfo("[" + x.mkString(",") + "]"))
-      assert(timeTaken < maxWaitTimeMillis, "Operation timed out after " + timeTaken + " ms")
-      assert(output.size === numExpectedOutput, "Unexpected number of outputs generated")
-
-      Thread.sleep(100) // Give some time for the forgetting old RDDs to complete
-    } finally {
-      ssc.stop(stopSparkContext = true)
-    }
-    output
-  }
-
-  /**
-   * Verify whether the output values after running a DStream operation
-   * is same as the expected output values, by comparing the output
-   * collections either as lists (order matters) or sets (order does not matter)
-   */
-  def verifyOutput[V: ClassTag](
-      output: Seq[Seq[V]],
-      expectedOutput: Seq[Seq[V]],
-      useSet: Boolean
-    ) {
-    logInfo("--------------------------------")
-    logInfo("output.size = " + output.size)
-    logInfo("output")
-    output.foreach(x => logInfo("[" + x.mkString(",") + "]"))
-    logInfo("expected output.size = " + expectedOutput.size)
-    logInfo("expected output")
-    expectedOutput.foreach(x => logInfo("[" + x.mkString(",") + "]"))
-    logInfo("--------------------------------")
-
-    // Match the output with the expected output
-    assert(output.size === expectedOutput.size, "Number of outputs do not match")
-    for (i <- 0 until output.size) {
-      if (useSet) {
-        assert(output(i).toSet === expectedOutput(i).toSet)
-      } else {
-        assert(output(i).toList === expectedOutput(i).toList)
-      }
-    }
-    logInfo("Output verified successfully")
-  }
-
-  /**
-   * Test unary DStream operation with a list of inputs, with number of
-   * batches to run same as the number of expected output values
-   */
-  def testOperation[U: ClassTag, V: ClassTag](
-      input: Seq[Seq[U]],
-      operation: DStream[U] => DStream[V],
-      expectedOutput: Seq[Seq[V]],
-      useSet: Boolean = false
-    ) {
-    testOperation[U, V](input, operation, expectedOutput, -1, useSet)
-  }
-
-  /**
-   * Test unary DStream operation with a list of inputs
-   * @param input      Sequence of input collections
-   * @param operation  Binary DStream operation to be applied to the 2 inputs
-   * @param expectedOutput Sequence of expected output collections
-   * @param numBatches Number of batches to run the operation for
-   * @param useSet     Compare the output values with the expected output values
-   *                   as sets (order matters) or as lists (order does not matter)
-   */
-  def testOperation[U: ClassTag, V: ClassTag](
-      input: Seq[Seq[U]],
-      operation: DStream[U] => DStream[V],
-      expectedOutput: Seq[Seq[V]],
-      numBatches: Int,
-      useSet: Boolean
-    ) {
-    val numBatches_ = if (numBatches > 0) numBatches else expectedOutput.size
-    withStreamingContext(setupStreams[U, V](input, operation)) { ssc =>
-      val output = runStreams[V](ssc, numBatches_, expectedOutput.size)
-      verifyOutput[V](output, expectedOutput, useSet)
-    }
-  }
-
-  /**
-   * Test binary DStream operation with two lists of inputs, with number of
-   * batches to run same as the number of expected output values
-   */
-  def testOperation[U: ClassTag, V: ClassTag, W: ClassTag](
-      input1: Seq[Seq[U]],
-      input2: Seq[Seq[V]],
-      operation: (DStream[U], DStream[V]) => DStream[W],
-      expectedOutput: Seq[Seq[W]],
-      useSet: Boolean
-    ) {
-    testOperation[U, V, W](input1, input2, operation, expectedOutput, -1, useSet)
-  }
-
-  /**
-   * Test binary DStream operation with two lists of inputs
-   * @param input1     First sequence of input collections
-   * @param input2     Second sequence of input collections
-   * @param operation  Binary DStream operation to be applied to the 2 inputs
-   * @param expectedOutput Sequence of expected output collections
-   * @param numBatches Number of batches to run the operation for
-   * @param useSet     Compare the output values with the expected output values
-   *                   as sets (order matters) or as lists (order does not matter)
-   */
-  def testOperation[U: ClassTag, V: ClassTag, W: ClassTag](
-      input1: Seq[Seq[U]],
-      input2: Seq[Seq[V]],
-      operation: (DStream[U], DStream[V]) => DStream[W],
-      expectedOutput: Seq[Seq[W]],
-      numBatches: Int,
-      useSet: Boolean
-    ) {
-    val numBatches_ = if (numBatches > 0) numBatches else expectedOutput.size
-    withStreamingContext(setupStreams[U, V, W](input1, input2, operation)) { ssc =>
-      val output = runStreams[W](ssc, numBatches_, expectedOutput.size)
-      verifyOutput[W](output, expectedOutput, useSet)
-    }
-  }
-}
diff --git a/external/mqtt/src/test/scala/org/apache/spark/streaming/mqtt/MQTTStreamSuite.scala b/external/mqtt/src/test/scala/org/apache/spark/streaming/mqtt/MQTTStreamSuite.scala
index 467fd263e2d64..84595acf45ccb 100644
--- a/external/mqtt/src/test/scala/org/apache/spark/streaming/mqtt/MQTTStreamSuite.scala
+++ b/external/mqtt/src/test/scala/org/apache/spark/streaming/mqtt/MQTTStreamSuite.scala
@@ -17,11 +17,19 @@
 
 package org.apache.spark.streaming.mqtt
 
-import org.apache.spark.streaming.{StreamingContext, TestSuiteBase}
+import org.scalatest.FunSuite
+
+import org.apache.spark.streaming.{Seconds, StreamingContext}
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.streaming.dstream.ReceiverInputDStream
 
-class MQTTStreamSuite extends TestSuiteBase {
+class MQTTStreamSuite extends FunSuite {
+
+  val batchDuration = Seconds(1)
+
+  private val master: String = "local[2]"
+
+  private val framework: String = this.getClass.getSimpleName
 
   test("mqtt input stream") {
     val ssc = new StreamingContext(master, framework, batchDuration)
diff --git a/external/twitter/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala b/external/twitter/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala
deleted file mode 100644
index 76b3b73a2ff3b..0000000000000
--- a/external/twitter/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala
+++ /dev/null
@@ -1,413 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.streaming
-
-import java.io.{ObjectInputStream, IOException}
-
-import scala.collection.mutable.ArrayBuffer
-import scala.collection.mutable.SynchronizedBuffer
-import scala.reflect.ClassTag
-
-import org.scalatest.{BeforeAndAfter, FunSuite}
-
-import org.apache.spark.streaming.dstream.{DStream, InputDStream, ForEachDStream}
-import org.apache.spark.streaming.util.ManualClock
-import org.apache.spark.{SparkConf, Logging}
-import org.apache.spark.rdd.RDD
-import org.apache.spark.util.Utils
-
-/**
- * This is a input stream just for the testsuites. This is equivalent to a checkpointable,
- * replayable, reliable message queue like Kafka. It requires a sequence as input, and
- * returns the i_th element at the i_th batch unde manual clock.
- */
-class TestInputStream[T: ClassTag](ssc_ : StreamingContext, input: Seq[Seq[T]], numPartitions: Int)
-  extends InputDStream[T](ssc_) {
-
-  def start() {}
-
-  def stop() {}
-
-  def compute(validTime: Time): Option[RDD[T]] = {
-    logInfo("Computing RDD for time " + validTime)
-    val index = ((validTime - zeroTime) / slideDuration - 1).toInt
-    val selectedInput = if (index < input.size) input(index) else Seq[T]()
-
-    // lets us test cases where RDDs are not created
-    if (selectedInput == null)
-      return None
-
-    val rdd = ssc.sc.makeRDD(selectedInput, numPartitions)
-    logInfo("Created RDD " + rdd.id + " with " + selectedInput)
-    Some(rdd)
-  }
-}
-
-/**
- * This is a output stream just for the testsuites. All the output is collected into a
- * ArrayBuffer. This buffer is wiped clean on being restored from checkpoint.
- *
- * The buffer contains a sequence of RDD's, each containing a sequence of items
- */
-class TestOutputStream[T: ClassTag](parent: DStream[T],
-    val output: ArrayBuffer[Seq[T]] = ArrayBuffer[Seq[T]]())
-  extends ForEachDStream[T](parent, (rdd: RDD[T], t: Time) => {
-    val collected = rdd.collect()
-    output += collected
-  }) {
-
-  // This is to clear the output buffer every it is read from a checkpoint
-  @throws(classOf[IOException])
-  private def readObject(ois: ObjectInputStream): Unit = Utils.tryOrIOException {
-    ois.defaultReadObject()
-    output.clear()
-  }
-}
-
-/**
- * This is a output stream just for the testsuites. All the output is collected into a
- * ArrayBuffer. This buffer is wiped clean on being restored from checkpoint.
- *
- * The buffer contains a sequence of RDD's, each containing a sequence of partitions, each
- * containing a sequence of items.
- */
-class TestOutputStreamWithPartitions[T: ClassTag](parent: DStream[T],
-    val output: ArrayBuffer[Seq[Seq[T]]] = ArrayBuffer[Seq[Seq[T]]]())
-  extends ForEachDStream[T](parent, (rdd: RDD[T], t: Time) => {
-    val collected = rdd.glom().collect().map(_.toSeq)
-    output += collected
-  }) {
-
-  // This is to clear the output buffer every it is read from a checkpoint
-  @throws(classOf[IOException])
-  private def readObject(ois: ObjectInputStream): Unit = Utils.tryOrIOException {
-    ois.defaultReadObject()
-    output.clear()
-  }
-
-  def toTestOutputStream = new TestOutputStream[T](this.parent, this.output.map(_.flatten))
-}
-
-/**
- * This is the base trait for Spark Streaming testsuites. This provides basic functionality
- * to run user-defined set of input on user-defined stream operations, and verify the output.
- */
-trait TestSuiteBase extends FunSuite with BeforeAndAfter with Logging {
-
-  // Name of the framework for Spark context
-  def framework = this.getClass.getSimpleName
-
-  // Master for Spark context
-  def master = "local[2]"
-
-  // Batch duration
-  def batchDuration = Seconds(1)
-
-  // Directory where the checkpoint data will be saved
-  lazy val checkpointDir = {
-    val dir = Utils.createTempDir()
-    logDebug(s"checkpointDir: $dir")
-    dir.toString
-  }
-
-  // Number of partitions of the input parallel collections created for testing
-  def numInputPartitions = 2
-
-  // Maximum time to wait before the test times out
-  def maxWaitTimeMillis = 10000
-
-  // Whether to use manual clock or not
-  def useManualClock = true
-
-  // Whether to actually wait in real time before changing manual clock
-  def actuallyWait = false
-
-  //// A SparkConf to use in tests. Can be modified before calling setupStreams to configure things.
-  val conf = new SparkConf()
-    .setMaster(master)
-    .setAppName(framework)
-
-  // Default before function for any streaming test suite. Override this
-  // if you want to add your stuff to "before" (i.e., don't call before { } )
-  def beforeFunction() {
-    if (useManualClock) {
-      logInfo("Using manual clock")
-      conf.set("spark.streaming.clock", "org.apache.spark.streaming.util.ManualClock")
-    } else {
-      logInfo("Using real clock")
-      conf.set("spark.streaming.clock", "org.apache.spark.streaming.util.SystemClock")
-    }
-  }
-
-  // Default after function for any streaming test suite. Override this
-  // if you want to add your stuff to "after" (i.e., don't call after { } )
-  def afterFunction() {
-    System.clearProperty("spark.streaming.clock")
-  }
-
-  before(beforeFunction)
-  after(afterFunction)
-
-  /**
-   * Run a block of code with the given StreamingContext and automatically
-   * stop the context when the block completes or when an exception is thrown.
-   */
-  def withStreamingContext[R](ssc: StreamingContext)(block: StreamingContext => R): R = {
-    try {
-      block(ssc)
-    } finally {
-      try {
-        ssc.stop(stopSparkContext = true)
-      } catch {
-        case e: Exception =>
-          logError("Error stopping StreamingContext", e)
-      }
-    }
-  }
-
-  /**
-   * Set up required DStreams to test the DStream operation using the two sequences
-   * of input collections.
-   */
-  def setupStreams[U: ClassTag, V: ClassTag](
-      input: Seq[Seq[U]],
-      operation: DStream[U] => DStream[V],
-      numPartitions: Int = numInputPartitions
-    ): StreamingContext = {
-    // Create StreamingContext
-    val ssc = new StreamingContext(conf, batchDuration)
-    if (checkpointDir != null) {
-      ssc.checkpoint(checkpointDir)
-    }
-
-    // Setup the stream computation
-    val inputStream = new TestInputStream(ssc, input, numPartitions)
-    val operatedStream = operation(inputStream)
-    val outputStream = new TestOutputStreamWithPartitions(operatedStream,
-      new ArrayBuffer[Seq[Seq[V]]] with SynchronizedBuffer[Seq[Seq[V]]])
-    outputStream.register()
-    ssc
-  }
-
-  /**
-   * Set up required DStreams to test the binary operation using the sequence
-   * of input collections.
-   */
-  def setupStreams[U: ClassTag, V: ClassTag, W: ClassTag](
-      input1: Seq[Seq[U]],
-      input2: Seq[Seq[V]],
-      operation: (DStream[U], DStream[V]) => DStream[W]
-    ): StreamingContext = {
-    // Create StreamingContext
-    val ssc = new StreamingContext(conf, batchDuration)
-    if (checkpointDir != null) {
-      ssc.checkpoint(checkpointDir)
-    }
-
-    // Setup the stream computation
-    val inputStream1 = new TestInputStream(ssc, input1, numInputPartitions)
-    val inputStream2 = new TestInputStream(ssc, input2, numInputPartitions)
-    val operatedStream = operation(inputStream1, inputStream2)
-    val outputStream = new TestOutputStreamWithPartitions(operatedStream,
-      new ArrayBuffer[Seq[Seq[W]]] with SynchronizedBuffer[Seq[Seq[W]]])
-    outputStream.register()
-    ssc
-  }
-
-  /**
-   * Runs the streams set up in `ssc` on manual clock for `numBatches` batches and
-   * returns the collected output. It will wait until `numExpectedOutput` number of
-   * output data has been collected or timeout (set by `maxWaitTimeMillis`) is reached.
-   *
-   * Returns a sequence of items for each RDD.
-   */
-  def runStreams[V: ClassTag](
-      ssc: StreamingContext,
-      numBatches: Int,
-      numExpectedOutput: Int
-    ): Seq[Seq[V]] = {
-    // Flatten each RDD into a single Seq
-    runStreamsWithPartitions(ssc, numBatches, numExpectedOutput).map(_.flatten.toSeq)
-  }
-
-  /**
-   * Runs the streams set up in `ssc` on manual clock for `numBatches` batches and
-   * returns the collected output. It will wait until `numExpectedOutput` number of
-   * output data has been collected or timeout (set by `maxWaitTimeMillis`) is reached.
-   *
-   * Returns a sequence of RDD's. Each RDD is represented as several sequences of items, each
-   * representing one partition.
-   */
-  def runStreamsWithPartitions[V: ClassTag](
-      ssc: StreamingContext,
-      numBatches: Int,
-      numExpectedOutput: Int
-    ): Seq[Seq[Seq[V]]] = {
-    assert(numBatches > 0, "Number of batches to run stream computation is zero")
-    assert(numExpectedOutput > 0, "Number of expected outputs after " + numBatches + " is zero")
-    logInfo("numBatches = " + numBatches + ", numExpectedOutput = " + numExpectedOutput)
-
-    // Get the output buffer
-    val outputStream = ssc.graph.getOutputStreams.
-      filter(_.isInstanceOf[TestOutputStreamWithPartitions[_]]).
-      head.asInstanceOf[TestOutputStreamWithPartitions[V]]
-    val output = outputStream.output
-
-    try {
-      // Start computation
-      ssc.start()
-
-      // Advance manual clock
-      val clock = ssc.scheduler.clock.asInstanceOf[ManualClock]
-      logInfo("Manual clock before advancing = " + clock.time)
-      if (actuallyWait) {
-        for (i <- 1 to numBatches) {
-          logInfo("Actually waiting for " + batchDuration)
-          clock.addToTime(batchDuration.milliseconds)
-          Thread.sleep(batchDuration.milliseconds)
-        }
-      } else {
-        clock.addToTime(numBatches * batchDuration.milliseconds)
-      }
-      logInfo("Manual clock after advancing = " + clock.time)
-
-      // Wait until expected number of output items have been generated
-      val startTime = System.currentTimeMillis()
-      while (output.size < numExpectedOutput && System.currentTimeMillis() - startTime < maxWaitTimeMillis) {
-        logInfo("output.size = " + output.size + ", numExpectedOutput = " + numExpectedOutput)
-        ssc.awaitTermination(50)
-      }
-      val timeTaken = System.currentTimeMillis() - startTime
-      logInfo("Output generated in " + timeTaken + " milliseconds")
-      output.foreach(x => logInfo("[" + x.mkString(",") + "]"))
-      assert(timeTaken < maxWaitTimeMillis, "Operation timed out after " + timeTaken + " ms")
-      assert(output.size === numExpectedOutput, "Unexpected number of outputs generated")
-
-      Thread.sleep(100) // Give some time for the forgetting old RDDs to complete
-    } finally {
-      ssc.stop(stopSparkContext = true)
-    }
-    output
-  }
-
-  /**
-   * Verify whether the output values after running a DStream operation
-   * is same as the expected output values, by comparing the output
-   * collections either as lists (order matters) or sets (order does not matter)
-   */
-  def verifyOutput[V: ClassTag](
-      output: Seq[Seq[V]],
-      expectedOutput: Seq[Seq[V]],
-      useSet: Boolean
-    ) {
-    logInfo("--------------------------------")
-    logInfo("output.size = " + output.size)
-    logInfo("output")
-    output.foreach(x => logInfo("[" + x.mkString(",") + "]"))
-    logInfo("expected output.size = " + expectedOutput.size)
-    logInfo("expected output")
-    expectedOutput.foreach(x => logInfo("[" + x.mkString(",") + "]"))
-    logInfo("--------------------------------")
-
-    // Match the output with the expected output
-    assert(output.size === expectedOutput.size, "Number of outputs do not match")
-    for (i <- 0 until output.size) {
-      if (useSet) {
-        assert(output(i).toSet === expectedOutput(i).toSet)
-      } else {
-        assert(output(i).toList === expectedOutput(i).toList)
-      }
-    }
-    logInfo("Output verified successfully")
-  }
-
-  /**
-   * Test unary DStream operation with a list of inputs, with number of
-   * batches to run same as the number of expected output values
-   */
-  def testOperation[U: ClassTag, V: ClassTag](
-      input: Seq[Seq[U]],
-      operation: DStream[U] => DStream[V],
-      expectedOutput: Seq[Seq[V]],
-      useSet: Boolean = false
-    ) {
-    testOperation[U, V](input, operation, expectedOutput, -1, useSet)
-  }
-
-  /**
-   * Test unary DStream operation with a list of inputs
-   * @param input      Sequence of input collections
-   * @param operation  Binary DStream operation to be applied to the 2 inputs
-   * @param expectedOutput Sequence of expected output collections
-   * @param numBatches Number of batches to run the operation for
-   * @param useSet     Compare the output values with the expected output values
-   *                   as sets (order matters) or as lists (order does not matter)
-   */
-  def testOperation[U: ClassTag, V: ClassTag](
-      input: Seq[Seq[U]],
-      operation: DStream[U] => DStream[V],
-      expectedOutput: Seq[Seq[V]],
-      numBatches: Int,
-      useSet: Boolean
-    ) {
-    val numBatches_ = if (numBatches > 0) numBatches else expectedOutput.size
-    withStreamingContext(setupStreams[U, V](input, operation)) { ssc =>
-      val output = runStreams[V](ssc, numBatches_, expectedOutput.size)
-      verifyOutput[V](output, expectedOutput, useSet)
-    }
-  }
-
-  /**
-   * Test binary DStream operation with two lists of inputs, with number of
-   * batches to run same as the number of expected output values
-   */
-  def testOperation[U: ClassTag, V: ClassTag, W: ClassTag](
-      input1: Seq[Seq[U]],
-      input2: Seq[Seq[V]],
-      operation: (DStream[U], DStream[V]) => DStream[W],
-      expectedOutput: Seq[Seq[W]],
-      useSet: Boolean
-    ) {
-    testOperation[U, V, W](input1, input2, operation, expectedOutput, -1, useSet)
-  }
-
-  /**
-   * Test binary DStream operation with two lists of inputs
-   * @param input1     First sequence of input collections
-   * @param input2     Second sequence of input collections
-   * @param operation  Binary DStream operation to be applied to the 2 inputs
-   * @param expectedOutput Sequence of expected output collections
-   * @param numBatches Number of batches to run the operation for
-   * @param useSet     Compare the output values with the expected output values
-   *                   as sets (order matters) or as lists (order does not matter)
-   */
-  def testOperation[U: ClassTag, V: ClassTag, W: ClassTag](
-      input1: Seq[Seq[U]],
-      input2: Seq[Seq[V]],
-      operation: (DStream[U], DStream[V]) => DStream[W],
-      expectedOutput: Seq[Seq[W]],
-      numBatches: Int,
-      useSet: Boolean
-    ) {
-    val numBatches_ = if (numBatches > 0) numBatches else expectedOutput.size
-    withStreamingContext(setupStreams[U, V, W](input1, input2, operation)) { ssc =>
-      val output = runStreams[W](ssc, numBatches_, expectedOutput.size)
-      verifyOutput[W](output, expectedOutput, useSet)
-    }
-  }
-}
diff --git a/external/twitter/src/test/scala/org/apache/spark/streaming/twitter/TwitterStreamSuite.scala b/external/twitter/src/test/scala/org/apache/spark/streaming/twitter/TwitterStreamSuite.scala
index 93741e0375164..9ee57d7581d85 100644
--- a/external/twitter/src/test/scala/org/apache/spark/streaming/twitter/TwitterStreamSuite.scala
+++ b/external/twitter/src/test/scala/org/apache/spark/streaming/twitter/TwitterStreamSuite.scala
@@ -17,13 +17,23 @@
 
 package org.apache.spark.streaming.twitter
 
-import org.apache.spark.streaming.{StreamingContext, TestSuiteBase}
-import org.apache.spark.storage.StorageLevel
+
+import org.scalatest.{BeforeAndAfter, FunSuite}
+import twitter4j.Status
 import twitter4j.auth.{NullAuthorization, Authorization}
+
+import org.apache.spark.Logging
+import org.apache.spark.streaming.{Seconds, StreamingContext}
+import org.apache.spark.storage.StorageLevel
 import org.apache.spark.streaming.dstream.ReceiverInputDStream
-import twitter4j.Status
 
-class TwitterStreamSuite extends TestSuiteBase {
+class TwitterStreamSuite extends FunSuite with BeforeAndAfter with Logging {
+
+  val batchDuration = Seconds(1)
+
+  private val master: String = "local[2]"
+
+  private val framework: String = this.getClass.getSimpleName
 
   test("twitter input stream") {
     val ssc = new StreamingContext(master, framework, batchDuration)
diff --git a/external/zeromq/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala b/external/zeromq/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala
deleted file mode 100644
index 76b3b73a2ff3b..0000000000000
--- a/external/zeromq/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala
+++ /dev/null
@@ -1,413 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.streaming
-
-import java.io.{ObjectInputStream, IOException}
-
-import scala.collection.mutable.ArrayBuffer
-import scala.collection.mutable.SynchronizedBuffer
-import scala.reflect.ClassTag
-
-import org.scalatest.{BeforeAndAfter, FunSuite}
-
-import org.apache.spark.streaming.dstream.{DStream, InputDStream, ForEachDStream}
-import org.apache.spark.streaming.util.ManualClock
-import org.apache.spark.{SparkConf, Logging}
-import org.apache.spark.rdd.RDD
-import org.apache.spark.util.Utils
-
-/**
- * This is a input stream just for the testsuites. This is equivalent to a checkpointable,
- * replayable, reliable message queue like Kafka. It requires a sequence as input, and
- * returns the i_th element at the i_th batch unde manual clock.
- */
-class TestInputStream[T: ClassTag](ssc_ : StreamingContext, input: Seq[Seq[T]], numPartitions: Int)
-  extends InputDStream[T](ssc_) {
-
-  def start() {}
-
-  def stop() {}
-
-  def compute(validTime: Time): Option[RDD[T]] = {
-    logInfo("Computing RDD for time " + validTime)
-    val index = ((validTime - zeroTime) / slideDuration - 1).toInt
-    val selectedInput = if (index < input.size) input(index) else Seq[T]()
-
-    // lets us test cases where RDDs are not created
-    if (selectedInput == null)
-      return None
-
-    val rdd = ssc.sc.makeRDD(selectedInput, numPartitions)
-    logInfo("Created RDD " + rdd.id + " with " + selectedInput)
-    Some(rdd)
-  }
-}
-
-/**
- * This is a output stream just for the testsuites. All the output is collected into a
- * ArrayBuffer. This buffer is wiped clean on being restored from checkpoint.
- *
- * The buffer contains a sequence of RDD's, each containing a sequence of items
- */
-class TestOutputStream[T: ClassTag](parent: DStream[T],
-    val output: ArrayBuffer[Seq[T]] = ArrayBuffer[Seq[T]]())
-  extends ForEachDStream[T](parent, (rdd: RDD[T], t: Time) => {
-    val collected = rdd.collect()
-    output += collected
-  }) {
-
-  // This is to clear the output buffer every it is read from a checkpoint
-  @throws(classOf[IOException])
-  private def readObject(ois: ObjectInputStream): Unit = Utils.tryOrIOException {
-    ois.defaultReadObject()
-    output.clear()
-  }
-}
-
-/**
- * This is a output stream just for the testsuites. All the output is collected into a
- * ArrayBuffer. This buffer is wiped clean on being restored from checkpoint.
- *
- * The buffer contains a sequence of RDD's, each containing a sequence of partitions, each
- * containing a sequence of items.
- */
-class TestOutputStreamWithPartitions[T: ClassTag](parent: DStream[T],
-    val output: ArrayBuffer[Seq[Seq[T]]] = ArrayBuffer[Seq[Seq[T]]]())
-  extends ForEachDStream[T](parent, (rdd: RDD[T], t: Time) => {
-    val collected = rdd.glom().collect().map(_.toSeq)
-    output += collected
-  }) {
-
-  // This is to clear the output buffer every it is read from a checkpoint
-  @throws(classOf[IOException])
-  private def readObject(ois: ObjectInputStream): Unit = Utils.tryOrIOException {
-    ois.defaultReadObject()
-    output.clear()
-  }
-
-  def toTestOutputStream = new TestOutputStream[T](this.parent, this.output.map(_.flatten))
-}
-
-/**
- * This is the base trait for Spark Streaming testsuites. This provides basic functionality
- * to run user-defined set of input on user-defined stream operations, and verify the output.
- */
-trait TestSuiteBase extends FunSuite with BeforeAndAfter with Logging {
-
-  // Name of the framework for Spark context
-  def framework = this.getClass.getSimpleName
-
-  // Master for Spark context
-  def master = "local[2]"
-
-  // Batch duration
-  def batchDuration = Seconds(1)
-
-  // Directory where the checkpoint data will be saved
-  lazy val checkpointDir = {
-    val dir = Utils.createTempDir()
-    logDebug(s"checkpointDir: $dir")
-    dir.toString
-  }
-
-  // Number of partitions of the input parallel collections created for testing
-  def numInputPartitions = 2
-
-  // Maximum time to wait before the test times out
-  def maxWaitTimeMillis = 10000
-
-  // Whether to use manual clock or not
-  def useManualClock = true
-
-  // Whether to actually wait in real time before changing manual clock
-  def actuallyWait = false
-
-  //// A SparkConf to use in tests. Can be modified before calling setupStreams to configure things.
-  val conf = new SparkConf()
-    .setMaster(master)
-    .setAppName(framework)
-
-  // Default before function for any streaming test suite. Override this
-  // if you want to add your stuff to "before" (i.e., don't call before { } )
-  def beforeFunction() {
-    if (useManualClock) {
-      logInfo("Using manual clock")
-      conf.set("spark.streaming.clock", "org.apache.spark.streaming.util.ManualClock")
-    } else {
-      logInfo("Using real clock")
-      conf.set("spark.streaming.clock", "org.apache.spark.streaming.util.SystemClock")
-    }
-  }
-
-  // Default after function for any streaming test suite. Override this
-  // if you want to add your stuff to "after" (i.e., don't call after { } )
-  def afterFunction() {
-    System.clearProperty("spark.streaming.clock")
-  }
-
-  before(beforeFunction)
-  after(afterFunction)
-
-  /**
-   * Run a block of code with the given StreamingContext and automatically
-   * stop the context when the block completes or when an exception is thrown.
-   */
-  def withStreamingContext[R](ssc: StreamingContext)(block: StreamingContext => R): R = {
-    try {
-      block(ssc)
-    } finally {
-      try {
-        ssc.stop(stopSparkContext = true)
-      } catch {
-        case e: Exception =>
-          logError("Error stopping StreamingContext", e)
-      }
-    }
-  }
-
-  /**
-   * Set up required DStreams to test the DStream operation using the two sequences
-   * of input collections.
-   */
-  def setupStreams[U: ClassTag, V: ClassTag](
-      input: Seq[Seq[U]],
-      operation: DStream[U] => DStream[V],
-      numPartitions: Int = numInputPartitions
-    ): StreamingContext = {
-    // Create StreamingContext
-    val ssc = new StreamingContext(conf, batchDuration)
-    if (checkpointDir != null) {
-      ssc.checkpoint(checkpointDir)
-    }
-
-    // Setup the stream computation
-    val inputStream = new TestInputStream(ssc, input, numPartitions)
-    val operatedStream = operation(inputStream)
-    val outputStream = new TestOutputStreamWithPartitions(operatedStream,
-      new ArrayBuffer[Seq[Seq[V]]] with SynchronizedBuffer[Seq[Seq[V]]])
-    outputStream.register()
-    ssc
-  }
-
-  /**
-   * Set up required DStreams to test the binary operation using the sequence
-   * of input collections.
-   */
-  def setupStreams[U: ClassTag, V: ClassTag, W: ClassTag](
-      input1: Seq[Seq[U]],
-      input2: Seq[Seq[V]],
-      operation: (DStream[U], DStream[V]) => DStream[W]
-    ): StreamingContext = {
-    // Create StreamingContext
-    val ssc = new StreamingContext(conf, batchDuration)
-    if (checkpointDir != null) {
-      ssc.checkpoint(checkpointDir)
-    }
-
-    // Setup the stream computation
-    val inputStream1 = new TestInputStream(ssc, input1, numInputPartitions)
-    val inputStream2 = new TestInputStream(ssc, input2, numInputPartitions)
-    val operatedStream = operation(inputStream1, inputStream2)
-    val outputStream = new TestOutputStreamWithPartitions(operatedStream,
-      new ArrayBuffer[Seq[Seq[W]]] with SynchronizedBuffer[Seq[Seq[W]]])
-    outputStream.register()
-    ssc
-  }
-
-  /**
-   * Runs the streams set up in `ssc` on manual clock for `numBatches` batches and
-   * returns the collected output. It will wait until `numExpectedOutput` number of
-   * output data has been collected or timeout (set by `maxWaitTimeMillis`) is reached.
-   *
-   * Returns a sequence of items for each RDD.
-   */
-  def runStreams[V: ClassTag](
-      ssc: StreamingContext,
-      numBatches: Int,
-      numExpectedOutput: Int
-    ): Seq[Seq[V]] = {
-    // Flatten each RDD into a single Seq
-    runStreamsWithPartitions(ssc, numBatches, numExpectedOutput).map(_.flatten.toSeq)
-  }
-
-  /**
-   * Runs the streams set up in `ssc` on manual clock for `numBatches` batches and
-   * returns the collected output. It will wait until `numExpectedOutput` number of
-   * output data has been collected or timeout (set by `maxWaitTimeMillis`) is reached.
-   *
-   * Returns a sequence of RDD's. Each RDD is represented as several sequences of items, each
-   * representing one partition.
-   */
-  def runStreamsWithPartitions[V: ClassTag](
-      ssc: StreamingContext,
-      numBatches: Int,
-      numExpectedOutput: Int
-    ): Seq[Seq[Seq[V]]] = {
-    assert(numBatches > 0, "Number of batches to run stream computation is zero")
-    assert(numExpectedOutput > 0, "Number of expected outputs after " + numBatches + " is zero")
-    logInfo("numBatches = " + numBatches + ", numExpectedOutput = " + numExpectedOutput)
-
-    // Get the output buffer
-    val outputStream = ssc.graph.getOutputStreams.
-      filter(_.isInstanceOf[TestOutputStreamWithPartitions[_]]).
-      head.asInstanceOf[TestOutputStreamWithPartitions[V]]
-    val output = outputStream.output
-
-    try {
-      // Start computation
-      ssc.start()
-
-      // Advance manual clock
-      val clock = ssc.scheduler.clock.asInstanceOf[ManualClock]
-      logInfo("Manual clock before advancing = " + clock.time)
-      if (actuallyWait) {
-        for (i <- 1 to numBatches) {
-          logInfo("Actually waiting for " + batchDuration)
-          clock.addToTime(batchDuration.milliseconds)
-          Thread.sleep(batchDuration.milliseconds)
-        }
-      } else {
-        clock.addToTime(numBatches * batchDuration.milliseconds)
-      }
-      logInfo("Manual clock after advancing = " + clock.time)
-
-      // Wait until expected number of output items have been generated
-      val startTime = System.currentTimeMillis()
-      while (output.size < numExpectedOutput && System.currentTimeMillis() - startTime < maxWaitTimeMillis) {
-        logInfo("output.size = " + output.size + ", numExpectedOutput = " + numExpectedOutput)
-        ssc.awaitTermination(50)
-      }
-      val timeTaken = System.currentTimeMillis() - startTime
-      logInfo("Output generated in " + timeTaken + " milliseconds")
-      output.foreach(x => logInfo("[" + x.mkString(",") + "]"))
-      assert(timeTaken < maxWaitTimeMillis, "Operation timed out after " + timeTaken + " ms")
-      assert(output.size === numExpectedOutput, "Unexpected number of outputs generated")
-
-      Thread.sleep(100) // Give some time for the forgetting old RDDs to complete
-    } finally {
-      ssc.stop(stopSparkContext = true)
-    }
-    output
-  }
-
-  /**
-   * Verify whether the output values after running a DStream operation
-   * is same as the expected output values, by comparing the output
-   * collections either as lists (order matters) or sets (order does not matter)
-   */
-  def verifyOutput[V: ClassTag](
-      output: Seq[Seq[V]],
-      expectedOutput: Seq[Seq[V]],
-      useSet: Boolean
-    ) {
-    logInfo("--------------------------------")
-    logInfo("output.size = " + output.size)
-    logInfo("output")
-    output.foreach(x => logInfo("[" + x.mkString(",") + "]"))
-    logInfo("expected output.size = " + expectedOutput.size)
-    logInfo("expected output")
-    expectedOutput.foreach(x => logInfo("[" + x.mkString(",") + "]"))
-    logInfo("--------------------------------")
-
-    // Match the output with the expected output
-    assert(output.size === expectedOutput.size, "Number of outputs do not match")
-    for (i <- 0 until output.size) {
-      if (useSet) {
-        assert(output(i).toSet === expectedOutput(i).toSet)
-      } else {
-        assert(output(i).toList === expectedOutput(i).toList)
-      }
-    }
-    logInfo("Output verified successfully")
-  }
-
-  /**
-   * Test unary DStream operation with a list of inputs, with number of
-   * batches to run same as the number of expected output values
-   */
-  def testOperation[U: ClassTag, V: ClassTag](
-      input: Seq[Seq[U]],
-      operation: DStream[U] => DStream[V],
-      expectedOutput: Seq[Seq[V]],
-      useSet: Boolean = false
-    ) {
-    testOperation[U, V](input, operation, expectedOutput, -1, useSet)
-  }
-
-  /**
-   * Test unary DStream operation with a list of inputs
-   * @param input      Sequence of input collections
-   * @param operation  Binary DStream operation to be applied to the 2 inputs
-   * @param expectedOutput Sequence of expected output collections
-   * @param numBatches Number of batches to run the operation for
-   * @param useSet     Compare the output values with the expected output values
-   *                   as sets (order matters) or as lists (order does not matter)
-   */
-  def testOperation[U: ClassTag, V: ClassTag](
-      input: Seq[Seq[U]],
-      operation: DStream[U] => DStream[V],
-      expectedOutput: Seq[Seq[V]],
-      numBatches: Int,
-      useSet: Boolean
-    ) {
-    val numBatches_ = if (numBatches > 0) numBatches else expectedOutput.size
-    withStreamingContext(setupStreams[U, V](input, operation)) { ssc =>
-      val output = runStreams[V](ssc, numBatches_, expectedOutput.size)
-      verifyOutput[V](output, expectedOutput, useSet)
-    }
-  }
-
-  /**
-   * Test binary DStream operation with two lists of inputs, with number of
-   * batches to run same as the number of expected output values
-   */
-  def testOperation[U: ClassTag, V: ClassTag, W: ClassTag](
-      input1: Seq[Seq[U]],
-      input2: Seq[Seq[V]],
-      operation: (DStream[U], DStream[V]) => DStream[W],
-      expectedOutput: Seq[Seq[W]],
-      useSet: Boolean
-    ) {
-    testOperation[U, V, W](input1, input2, operation, expectedOutput, -1, useSet)
-  }
-
-  /**
-   * Test binary DStream operation with two lists of inputs
-   * @param input1     First sequence of input collections
-   * @param input2     Second sequence of input collections
-   * @param operation  Binary DStream operation to be applied to the 2 inputs
-   * @param expectedOutput Sequence of expected output collections
-   * @param numBatches Number of batches to run the operation for
-   * @param useSet     Compare the output values with the expected output values
-   *                   as sets (order matters) or as lists (order does not matter)
-   */
-  def testOperation[U: ClassTag, V: ClassTag, W: ClassTag](
-      input1: Seq[Seq[U]],
-      input2: Seq[Seq[V]],
-      operation: (DStream[U], DStream[V]) => DStream[W],
-      expectedOutput: Seq[Seq[W]],
-      numBatches: Int,
-      useSet: Boolean
-    ) {
-    val numBatches_ = if (numBatches > 0) numBatches else expectedOutput.size
-    withStreamingContext(setupStreams[U, V, W](input1, input2, operation)) { ssc =>
-      val output = runStreams[W](ssc, numBatches_, expectedOutput.size)
-      verifyOutput[W](output, expectedOutput, useSet)
-    }
-  }
-}
diff --git a/external/zeromq/src/test/scala/org/apache/spark/streaming/zeromq/ZeroMQStreamSuite.scala b/external/zeromq/src/test/scala/org/apache/spark/streaming/zeromq/ZeroMQStreamSuite.scala
index cc10ff6ae03cd..a7566e733d891 100644
--- a/external/zeromq/src/test/scala/org/apache/spark/streaming/zeromq/ZeroMQStreamSuite.scala
+++ b/external/zeromq/src/test/scala/org/apache/spark/streaming/zeromq/ZeroMQStreamSuite.scala
@@ -20,12 +20,19 @@ package org.apache.spark.streaming.zeromq
 import akka.actor.SupervisorStrategy
 import akka.util.ByteString
 import akka.zeromq.Subscribe
+import org.scalatest.FunSuite
 
 import org.apache.spark.storage.StorageLevel
-import org.apache.spark.streaming.{StreamingContext, TestSuiteBase}
+import org.apache.spark.streaming.{Seconds, StreamingContext}
 import org.apache.spark.streaming.dstream.ReceiverInputDStream
 
-class ZeroMQStreamSuite extends TestSuiteBase {
+class ZeroMQStreamSuite extends FunSuite {
+
+  val batchDuration = Seconds(1)
+
+  private val master: String = "local[2]"
+
+  private val framework: String = this.getClass.getSimpleName
 
   test("zeromq input stream") {
     val ssc = new StreamingContext(master, framework, batchDuration)

From 994d1d327455111025b4ea1ae7aad440cffb40d5 Mon Sep 17 00:00:00 2001
From: Prashant Sharma <prashant.s@imaginea.com>
Date: Wed, 19 Nov 2014 14:36:04 +0530
Subject: [PATCH 3/3] Fixed failing flume tests

---
 .../spark/streaming/flume/FlumePollingStreamSuite.scala   | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/external/flume/src/test/scala/org/apache/spark/streaming/flume/FlumePollingStreamSuite.scala b/external/flume/src/test/scala/org/apache/spark/streaming/flume/FlumePollingStreamSuite.scala
index ba079d84e4529..b57a1c71e35b9 100644
--- a/external/flume/src/test/scala/org/apache/spark/streaming/flume/FlumePollingStreamSuite.scala
+++ b/external/flume/src/test/scala/org/apache/spark/streaming/flume/FlumePollingStreamSuite.scala
@@ -52,6 +52,13 @@ class FlumePollingStreamSuite extends FunSuite with BeforeAndAfter with Logging
     .setMaster("local[2]")
     .setAppName(this.getClass.getSimpleName)
 
+  def beforeFunction() {
+    logInfo("Using manual clock")
+    conf.set("spark.streaming.clock", "org.apache.spark.streaming.util.ManualClock")
+  }
+
+  before(beforeFunction())
+
   test("flume polling test") {
     testMultipleTimes(testFlumePolling)
   }
@@ -234,4 +241,5 @@ class FlumePollingStreamSuite extends FunSuite with BeforeAndAfter with Logging
       null
     }
   }
+
 }