From 60d10c286dbfa256ed5e5bec9d387588043efa54 Mon Sep 17 00:00:00 2001 From: Xin Ren Date: Thu, 26 May 2016 08:07:37 -0700 Subject: [PATCH 1/4] fix typos --- .../org/apache/spark/streaming/dstream/DStream.scala | 2 +- .../org/apache/spark/streaming/receiver/Receiver.scala | 9 ++++----- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala index 01dcfcf24b0fe..bd0eaf678537e 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala @@ -52,7 +52,7 @@ import org.apache.spark.util.{CallSite, Utils} * `join`. These operations are automatically available on any DStream of pairs * (e.g., DStream[(Int, Int)] through implicit conversions. * - * DStreams internally is characterized by a few basic properties: + * DStream internally is characterized by a few basic properties: * - A list of other DStreams that the DStream depends on * - A time interval at which the DStream generates an RDD * - A function that is used to generate an RDD after each time interval diff --git a/streaming/src/main/scala/org/apache/spark/streaming/receiver/Receiver.scala b/streaming/src/main/scala/org/apache/spark/streaming/receiver/Receiver.scala index 5157ca62dc449..0681e5e4e1527 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/receiver/Receiver.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/receiver/Receiver.scala @@ -32,7 +32,7 @@ import org.apache.spark.storage.StorageLevel * should define the setup steps necessary to start receiving data, * and `onStop()` should define the cleanup steps necessary to stop receiving data. * Exceptions while receiving can be handled either by restarting the receiver with `restart(...)` - * or stopped completely by `stop(...)` or + * or stopped completely by `stop(...)`. * * A custom receiver in Scala would look like this. * @@ -45,7 +45,7 @@ import org.apache.spark.storage.StorageLevel * // Call store(...) in those threads to store received data into Spark's memory. * * // Call stop(...), restart(...) or reportError(...) on any thread based on how - * // different errors needs to be handled. + * // different errors need to be handled. * * // See corresponding method documentation for more details * } @@ -71,7 +71,7 @@ import org.apache.spark.storage.StorageLevel * // Call store(...) in those threads to store received data into Spark's memory. * * // Call stop(...), restart(...) or reportError(...) on any thread based on how - * // different errors needs to be handled. + * // different errors need to be handled. * * // See corresponding method documentation for more details * } @@ -242,8 +242,7 @@ abstract class Receiver[T](val storageLevel: StorageLevel) extends Serializable } /** - * Get the unique identifier the receiver input stream that this - * receiver is associated with. + * Get the unique identifier of the receiver input stream that this receiver is associated with. */ def streamId: Int = id From 6d81172938e4282c67fc9bac27b3db3e56e86573 Mon Sep 17 00:00:00 2001 From: Xin Ren Date: Fri, 27 May 2016 00:18:09 -0700 Subject: [PATCH 2/4] more typos fix --- .../org/apache/spark/examples/streaming/CustomReceiver.scala | 4 ++-- .../org/apache/spark/examples/streaming/QueueStream.scala | 2 +- .../apache/spark/streaming/scheduler/ReceiverTracker.scala | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/examples/src/main/scala/org/apache/spark/examples/streaming/CustomReceiver.scala b/examples/src/main/scala/org/apache/spark/examples/streaming/CustomReceiver.scala index 1d144db9864bd..43044d01b1204 100644 --- a/examples/src/main/scala/org/apache/spark/examples/streaming/CustomReceiver.scala +++ b/examples/src/main/scala/org/apache/spark/examples/streaming/CustomReceiver.scala @@ -29,7 +29,7 @@ import org.apache.spark.streaming.{Seconds, StreamingContext} import org.apache.spark.streaming.receiver.Receiver /** - * Custom Receiver that receives data over a socket. Received bytes is interpreted as + * Custom Receiver that receives data over a socket. Received bytes are interpreted as * text and \n delimited lines are considered as records. They are then counted and printed. * * To run this on your local machine, you need to first run a Netcat server @@ -50,7 +50,7 @@ object CustomReceiver { val sparkConf = new SparkConf().setAppName("CustomReceiver") val ssc = new StreamingContext(sparkConf, Seconds(1)) - // Create a input stream with the custom receiver on target ip:port and count the + // Create an input stream with the custom receiver on target ip:port and count the // words in input stream of \n delimited text (eg. generated by 'nc') val lines = ssc.receiverStream(new CustomReceiver(args(0), args(1).toInt)) val words = lines.flatMap(_.split(" ")) diff --git a/examples/src/main/scala/org/apache/spark/examples/streaming/QueueStream.scala b/examples/src/main/scala/org/apache/spark/examples/streaming/QueueStream.scala index 5455aed22085d..19bacd449787b 100644 --- a/examples/src/main/scala/org/apache/spark/examples/streaming/QueueStream.scala +++ b/examples/src/main/scala/org/apache/spark/examples/streaming/QueueStream.scala @@ -43,7 +43,7 @@ object QueueStream { reducedStream.print() ssc.start() - // Create and push some RDDs into + // Create and push some RDDs into rddQueue for (i <- 1 to 30) { rddQueue.synchronized { rddQueue += ssc.sparkContext.makeRDD(1 to 1000, 10) diff --git a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceiverTracker.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceiverTracker.scala index 9aa2f0bbb9952..b9d898a72362e 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceiverTracker.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceiverTracker.scala @@ -20,7 +20,7 @@ package org.apache.spark.streaming.scheduler import java.util.concurrent.{CountDownLatch, TimeUnit} import scala.collection.mutable.HashMap -import scala.concurrent.{ExecutionContext, Future} +import scala.concurrent.ExecutionContext import scala.language.existentials import scala.util.{Failure, Success} From 504e8367439b55092fc202f9436178da27f43442 Mon Sep 17 00:00:00 2001 From: Xin Ren Date: Sat, 28 May 2016 16:12:33 -0700 Subject: [PATCH 3/4] more fix --- .../scala/org/apache/spark/rdd/ParallelCollectionRDD.scala | 4 ++-- .../apache/spark/streaming/receiver/BlockGenerator.scala | 6 +++--- .../org/apache/spark/streaming/scheduler/JobGenerator.scala | 3 +-- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/rdd/ParallelCollectionRDD.scala b/core/src/main/scala/org/apache/spark/rdd/ParallelCollectionRDD.scala index 34a1c112cbcd0..e9092739b298a 100644 --- a/core/src/main/scala/org/apache/spark/rdd/ParallelCollectionRDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/ParallelCollectionRDD.scala @@ -32,8 +32,8 @@ import org.apache.spark.util.Utils private[spark] class ParallelCollectionPartition[T: ClassTag]( var rddId: Long, var slice: Int, - var values: Seq[T]) - extends Partition with Serializable { + var values: Seq[T] + ) extends Partition with Serializable { def iterator: Iterator[T] = values.iterator diff --git a/streaming/src/main/scala/org/apache/spark/streaming/receiver/BlockGenerator.scala b/streaming/src/main/scala/org/apache/spark/streaming/receiver/BlockGenerator.scala index 4592e015ed9a0..90309c0145ae1 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/receiver/BlockGenerator.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/receiver/BlockGenerator.scala @@ -86,13 +86,13 @@ private[streaming] class BlockGenerator( /** * The BlockGenerator can be in 5 possible states, in the order as follows. * - * - Initialized: Nothing has been started + * - Initialized: Nothing has been started. * - Active: start() has been called, and it is generating blocks on added data. * - StoppedAddingData: stop() has been called, the adding of data has been stopped, * but blocks are still being generated and pushed. * - StoppedGeneratingBlocks: Generating of blocks has been stopped, but * they are still being pushed. - * - StoppedAll: Everything has stopped, and the BlockGenerator object can be GCed. + * - StoppedAll: Everything has been stopped, and the BlockGenerator object can be GCed. */ private object GeneratorState extends Enumeration { type GeneratorState = Value @@ -148,7 +148,7 @@ private[streaming] class BlockGenerator( blockIntervalTimer.stop(interruptTimer = false) synchronized { state = StoppedGeneratingBlocks } - // Wait for the queue to drain and mark generated as stopped + // Wait for the queue to drain and mark state as StoppedAll logInfo("Waiting for block pushing thread to terminate") blockPushingThread.join() synchronized { state = StoppedAll } diff --git a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobGenerator.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobGenerator.scala index 8f9421fc098ba..19c88f1ee0114 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobGenerator.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobGenerator.scala @@ -19,7 +19,6 @@ package org.apache.spark.streaming.scheduler import scala.util.{Failure, Success, Try} -import org.apache.spark.SparkEnv import org.apache.spark.internal.Logging import org.apache.spark.rdd.RDD import org.apache.spark.streaming.{Checkpoint, CheckpointWriter, Time} @@ -239,7 +238,7 @@ class JobGenerator(jobScheduler: JobScheduler) extends Logging { logInfo("Restarted JobGenerator at " + restartTime) } - /** Generate jobs and perform checkpoint for the given `time`. */ + /** Generate jobs and perform checkpointing for the given `time`. */ private def generateJobs(time: Time) { // Checkpoint all RDDs marked for checkpointing to ensure their lineages are // truncated periodically. Otherwise, we may run into stack overflows (SPARK-6847). From 15d4b9f2eb9cb5a222974172f32c97b56fec3633 Mon Sep 17 00:00:00 2001 From: Xin Ren Date: Sun, 29 May 2016 15:17:26 -0700 Subject: [PATCH 4/4] minor changes --- .../scala/org/apache/spark/streaming/dstream/DStream.scala | 2 +- .../scala/org/apache/spark/streaming/receiver/Receiver.scala | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala index bd0eaf678537e..147e8c129034b 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala @@ -52,7 +52,7 @@ import org.apache.spark.util.{CallSite, Utils} * `join`. These operations are automatically available on any DStream of pairs * (e.g., DStream[(Int, Int)] through implicit conversions. * - * DStream internally is characterized by a few basic properties: + * A DStream internally is characterized by a few basic properties: * - A list of other DStreams that the DStream depends on * - A time interval at which the DStream generates an RDD * - A function that is used to generate an RDD after each time interval diff --git a/streaming/src/main/scala/org/apache/spark/streaming/receiver/Receiver.scala b/streaming/src/main/scala/org/apache/spark/streaming/receiver/Receiver.scala index 0681e5e4e1527..d91a64df321a6 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/receiver/Receiver.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/receiver/Receiver.scala @@ -242,7 +242,8 @@ abstract class Receiver[T](val storageLevel: StorageLevel) extends Serializable } /** - * Get the unique identifier of the receiver input stream that this receiver is associated with. + * Get the unique identifier the receiver input stream that this + * receiver is associated with. */ def streamId: Int = id