From 60d10c286dbfa256ed5e5bec9d387588043efa54 Mon Sep 17 00:00:00 2001
From: Xin Ren <iamshrek@126.com>
Date: Thu, 26 May 2016 08:07:37 -0700
Subject: [PATCH 1/4] fix typos

---
 .../org/apache/spark/streaming/dstream/DStream.scala     | 2 +-
 .../org/apache/spark/streaming/receiver/Receiver.scala   | 9 ++++-----
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala
index 01dcfcf24b0fe..bd0eaf678537e 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala
@@ -52,7 +52,7 @@ import org.apache.spark.util.{CallSite, Utils}
  * `join`. These operations are automatically available on any DStream of pairs
  * (e.g., DStream[(Int, Int)] through implicit conversions.
  *
- * DStreams internally is characterized by a few basic properties:
+ * DStream internally is characterized by a few basic properties:
  *  - A list of other DStreams that the DStream depends on
  *  - A time interval at which the DStream generates an RDD
  *  - A function that is used to generate an RDD after each time interval
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/receiver/Receiver.scala b/streaming/src/main/scala/org/apache/spark/streaming/receiver/Receiver.scala
index 5157ca62dc449..0681e5e4e1527 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/receiver/Receiver.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/receiver/Receiver.scala
@@ -32,7 +32,7 @@ import org.apache.spark.storage.StorageLevel
  * should define the setup steps necessary to start receiving data,
  * and `onStop()` should define the cleanup steps necessary to stop receiving data.
  * Exceptions while receiving can be handled either by restarting the receiver with `restart(...)`
- * or stopped completely by `stop(...)` or
+ * or stopped completely by `stop(...)`.
  *
  * A custom receiver in Scala would look like this.
  *
@@ -45,7 +45,7 @@ import org.apache.spark.storage.StorageLevel
  *          // Call store(...) in those threads to store received data into Spark's memory.
  *
  *          // Call stop(...), restart(...) or reportError(...) on any thread based on how
- *          // different errors needs to be handled.
+ *          // different errors need to be handled.
  *
  *          // See corresponding method documentation for more details
  *      }
@@ -71,7 +71,7 @@ import org.apache.spark.storage.StorageLevel
  *          // Call store(...) in those threads to store received data into Spark's memory.
  *
  *          // Call stop(...), restart(...) or reportError(...) on any thread based on how
- *          // different errors needs to be handled.
+ *          // different errors need to be handled.
  *
  *          // See corresponding method documentation for more details
  *     }
@@ -242,8 +242,7 @@ abstract class Receiver[T](val storageLevel: StorageLevel) extends Serializable
   }
 
   /**
-   * Get the unique identifier the receiver input stream that this
-   * receiver is associated with.
+   * Get the unique identifier of the receiver input stream that this receiver is associated with.
    */
   def streamId: Int = id
 

From 6d81172938e4282c67fc9bac27b3db3e56e86573 Mon Sep 17 00:00:00 2001
From: Xin Ren <iamshrek@126.com>
Date: Fri, 27 May 2016 00:18:09 -0700
Subject: [PATCH 2/4] more typos fix

---
 .../org/apache/spark/examples/streaming/CustomReceiver.scala  | 4 ++--
 .../org/apache/spark/examples/streaming/QueueStream.scala     | 2 +-
 .../apache/spark/streaming/scheduler/ReceiverTracker.scala    | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/examples/src/main/scala/org/apache/spark/examples/streaming/CustomReceiver.scala b/examples/src/main/scala/org/apache/spark/examples/streaming/CustomReceiver.scala
index 1d144db9864bd..43044d01b1204 100644
--- a/examples/src/main/scala/org/apache/spark/examples/streaming/CustomReceiver.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/streaming/CustomReceiver.scala
@@ -29,7 +29,7 @@ import org.apache.spark.streaming.{Seconds, StreamingContext}
 import org.apache.spark.streaming.receiver.Receiver
 
 /**
- * Custom Receiver that receives data over a socket. Received bytes is interpreted as
+ * Custom Receiver that receives data over a socket. Received bytes are interpreted as
  * text and \n delimited lines are considered as records. They are then counted and printed.
  *
  * To run this on your local machine, you need to first run a Netcat server
@@ -50,7 +50,7 @@ object CustomReceiver {
     val sparkConf = new SparkConf().setAppName("CustomReceiver")
     val ssc = new StreamingContext(sparkConf, Seconds(1))
 
-    // Create a input stream with the custom receiver on target ip:port and count the
+    // Create an input stream with the custom receiver on target ip:port and count the
     // words in input stream of \n delimited text (eg. generated by 'nc')
     val lines = ssc.receiverStream(new CustomReceiver(args(0), args(1).toInt))
     val words = lines.flatMap(_.split(" "))
diff --git a/examples/src/main/scala/org/apache/spark/examples/streaming/QueueStream.scala b/examples/src/main/scala/org/apache/spark/examples/streaming/QueueStream.scala
index 5455aed22085d..19bacd449787b 100644
--- a/examples/src/main/scala/org/apache/spark/examples/streaming/QueueStream.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/streaming/QueueStream.scala
@@ -43,7 +43,7 @@ object QueueStream {
     reducedStream.print()
     ssc.start()
 
-    // Create and push some RDDs into
+    // Create and push some RDDs into rddQueue
     for (i <- 1 to 30) {
       rddQueue.synchronized {
         rddQueue += ssc.sparkContext.makeRDD(1 to 1000, 10)
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceiverTracker.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceiverTracker.scala
index 9aa2f0bbb9952..b9d898a72362e 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceiverTracker.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceiverTracker.scala
@@ -20,7 +20,7 @@ package org.apache.spark.streaming.scheduler
 import java.util.concurrent.{CountDownLatch, TimeUnit}
 
 import scala.collection.mutable.HashMap
-import scala.concurrent.{ExecutionContext, Future}
+import scala.concurrent.ExecutionContext
 import scala.language.existentials
 import scala.util.{Failure, Success}
 

From 504e8367439b55092fc202f9436178da27f43442 Mon Sep 17 00:00:00 2001
From: Xin Ren <iamshrek@126.com>
Date: Sat, 28 May 2016 16:12:33 -0700
Subject: [PATCH 3/4] more fix

---
 .../scala/org/apache/spark/rdd/ParallelCollectionRDD.scala  | 4 ++--
 .../apache/spark/streaming/receiver/BlockGenerator.scala    | 6 +++---
 .../org/apache/spark/streaming/scheduler/JobGenerator.scala | 3 +--
 3 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/rdd/ParallelCollectionRDD.scala b/core/src/main/scala/org/apache/spark/rdd/ParallelCollectionRDD.scala
index 34a1c112cbcd0..e9092739b298a 100644
--- a/core/src/main/scala/org/apache/spark/rdd/ParallelCollectionRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/ParallelCollectionRDD.scala
@@ -32,8 +32,8 @@ import org.apache.spark.util.Utils
 private[spark] class ParallelCollectionPartition[T: ClassTag](
     var rddId: Long,
     var slice: Int,
-    var values: Seq[T])
-    extends Partition with Serializable {
+    var values: Seq[T]
+  ) extends Partition with Serializable {
 
   def iterator: Iterator[T] = values.iterator
 
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/receiver/BlockGenerator.scala b/streaming/src/main/scala/org/apache/spark/streaming/receiver/BlockGenerator.scala
index 4592e015ed9a0..90309c0145ae1 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/receiver/BlockGenerator.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/receiver/BlockGenerator.scala
@@ -86,13 +86,13 @@ private[streaming] class BlockGenerator(
   /**
    * The BlockGenerator can be in 5 possible states, in the order as follows.
    *
-   *  - Initialized: Nothing has been started
+   *  - Initialized: Nothing has been started.
    *  - Active: start() has been called, and it is generating blocks on added data.
    *  - StoppedAddingData: stop() has been called, the adding of data has been stopped,
    *                       but blocks are still being generated and pushed.
    *  - StoppedGeneratingBlocks: Generating of blocks has been stopped, but
    *                             they are still being pushed.
-   *  - StoppedAll: Everything has stopped, and the BlockGenerator object can be GCed.
+   *  - StoppedAll: Everything has been stopped, and the BlockGenerator object can be GCed.
    */
   private object GeneratorState extends Enumeration {
     type GeneratorState = Value
@@ -148,7 +148,7 @@ private[streaming] class BlockGenerator(
     blockIntervalTimer.stop(interruptTimer = false)
     synchronized { state = StoppedGeneratingBlocks }
 
-    // Wait for the queue to drain and mark generated as stopped
+    // Wait for the queue to drain and mark state as StoppedAll
     logInfo("Waiting for block pushing thread to terminate")
     blockPushingThread.join()
     synchronized { state = StoppedAll }
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobGenerator.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobGenerator.scala
index 8f9421fc098ba..19c88f1ee0114 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobGenerator.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobGenerator.scala
@@ -19,7 +19,6 @@ package org.apache.spark.streaming.scheduler
 
 import scala.util.{Failure, Success, Try}
 
-import org.apache.spark.SparkEnv
 import org.apache.spark.internal.Logging
 import org.apache.spark.rdd.RDD
 import org.apache.spark.streaming.{Checkpoint, CheckpointWriter, Time}
@@ -239,7 +238,7 @@ class JobGenerator(jobScheduler: JobScheduler) extends Logging {
     logInfo("Restarted JobGenerator at " + restartTime)
   }
 
-  /** Generate jobs and perform checkpoint for the given `time`.  */
+  /** Generate jobs and perform checkpointing for the given `time`.  */
   private def generateJobs(time: Time) {
     // Checkpoint all RDDs marked for checkpointing to ensure their lineages are
     // truncated periodically. Otherwise, we may run into stack overflows (SPARK-6847).

From 15d4b9f2eb9cb5a222974172f32c97b56fec3633 Mon Sep 17 00:00:00 2001
From: Xin Ren <iamshrek@126.com>
Date: Sun, 29 May 2016 15:17:26 -0700
Subject: [PATCH 4/4] minor changes

---
 .../scala/org/apache/spark/streaming/dstream/DStream.scala     | 2 +-
 .../scala/org/apache/spark/streaming/receiver/Receiver.scala   | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala
index bd0eaf678537e..147e8c129034b 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala
@@ -52,7 +52,7 @@ import org.apache.spark.util.{CallSite, Utils}
  * `join`. These operations are automatically available on any DStream of pairs
  * (e.g., DStream[(Int, Int)] through implicit conversions.
  *
- * DStream internally is characterized by a few basic properties:
+ * A DStream internally is characterized by a few basic properties:
  *  - A list of other DStreams that the DStream depends on
  *  - A time interval at which the DStream generates an RDD
  *  - A function that is used to generate an RDD after each time interval
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/receiver/Receiver.scala b/streaming/src/main/scala/org/apache/spark/streaming/receiver/Receiver.scala
index 0681e5e4e1527..d91a64df321a6 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/receiver/Receiver.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/receiver/Receiver.scala
@@ -242,7 +242,8 @@ abstract class Receiver[T](val storageLevel: StorageLevel) extends Serializable
   }
 
   /**
-   * Get the unique identifier of the receiver input stream that this receiver is associated with.
+   * Get the unique identifier the receiver input stream that this
+   * receiver is associated with.
    */
   def streamId: Int = id