From b1ad7713787da496825ddcab4bc5c425cfb85aff Mon Sep 17 00:00:00 2001 From: Jose Torres Date: Thu, 7 Dec 2017 14:08:28 -0800 Subject: [PATCH 01/50] Refactor StreamExecution into a parent class so continuous processing can extend it --- .../execution/streaming/MicroBatchExecution.scala | 14 ++++++++++++++ .../sql/execution/streaming/StreamExecution.scala | 2 +- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala index 4a3de8bae4bc9..8ba5d3be0d077 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala @@ -97,7 +97,11 @@ class MicroBatchExecution( finishTrigger(dataAvailable) if (dataAvailable) { // Update committed offsets. +<<<<<<< HEAD commitLog.add(currentBatchId) +======= + batchCommitLog.add(currentBatchId) +>>>>>>> Refactor StreamExecution into a parent class so continuous processing can extend it committedOffsets ++= availableOffsets logDebug(s"batch ${currentBatchId} committed") // We'll increase currentBatchId after we complete processing current batch's data @@ -162,7 +166,11 @@ class MicroBatchExecution( /* identify the current batch id: if commit log indicates we successfully processed the * latest batch id in the offset log, then we can safely move to the next batch * i.e., committedBatchId + 1 */ +<<<<<<< HEAD commitLog.getLatest() match { +======= + batchCommitLog.getLatest() match { +>>>>>>> Refactor StreamExecution into a parent class so continuous processing can extend it case Some((latestCommittedBatchId, _)) => if (latestBatchId == latestCommittedBatchId) { /* The last batch was successfully committed, so we can safely process a @@ -307,9 +315,15 @@ class MicroBatchExecution( // It is now safe to discard the metadata beyond the minimum number to retain. // Note that purge is exclusive, i.e. it purges everything before the target ID. +<<<<<<< HEAD if (minLogEntriesToMaintain < currentBatchId) { offsetLog.purge(currentBatchId - minLogEntriesToMaintain) commitLog.purge(currentBatchId - minLogEntriesToMaintain) +======= + if (minBatchesToRetain < currentBatchId) { + offsetLog.purge(currentBatchId - minBatchesToRetain) + batchCommitLog.purge(currentBatchId - minBatchesToRetain) +>>>>>>> Refactor StreamExecution into a parent class so continuous processing can extend it } } } else { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala index 129995dcf3607..2f3ceafdddac0 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala @@ -83,7 +83,7 @@ abstract class StreamExecution( private val startLatch = new CountDownLatch(1) private val terminationLatch = new CountDownLatch(1) - val resolvedCheckpointRoot = { + val resolvedCheckpointRoot: String = { val checkpointPath = new Path(checkpointRoot) val fs = checkpointPath.getFileSystem(sparkSession.sessionState.newHadoopConf()) checkpointPath.makeQualified(fs.getUri, fs.getWorkingDirectory).toUri.toString From 4058c65525bb18c5cb6cef15561b2691068e3738 Mon Sep 17 00:00:00 2001 From: Jose Torres Date: Tue, 12 Dec 2017 11:31:28 -0800 Subject: [PATCH 02/50] address fmt --- .../apache/spark/sql/execution/streaming/StreamExecution.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala index 2f3ceafdddac0..129995dcf3607 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala @@ -83,7 +83,7 @@ abstract class StreamExecution( private val startLatch = new CountDownLatch(1) private val terminationLatch = new CountDownLatch(1) - val resolvedCheckpointRoot: String = { + val resolvedCheckpointRoot = { val checkpointPath = new Path(checkpointRoot) val fs = checkpointPath.getFileSystem(sparkSession.sessionState.newHadoopConf()) checkpointPath.makeQualified(fs.getUri, fs.getWorkingDirectory).toUri.toString From 2af39200f885cdd506950cd8e4c3bbc914539dc8 Mon Sep 17 00:00:00 2001 From: Jose Torres Date: Tue, 12 Dec 2017 16:09:48 -0800 Subject: [PATCH 03/50] slight changes --- .../streaming/MicroBatchExecution.scala | 18 ++++-------------- .../execution/streaming/StreamExecution.scala | 2 +- 2 files changed, 5 insertions(+), 15 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala index 8ba5d3be0d077..6743ae29c37a2 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala @@ -41,6 +41,10 @@ class MicroBatchExecution( sparkSession, name, checkpointRoot, analyzedPlan, sink, trigger, triggerClock, outputMode, deleteCheckpointOnStop) { + override val offsetLog = new OffsetSeqLog(sparkSession, checkpointFile("offsets")) + override val batchCommitLog = new BatchCommitLog(sparkSession, checkpointFile("commits")) + + private val triggerExecutor = trigger match { case t: ProcessingTime => ProcessingTimeExecutor(t, triggerClock) case OneTimeTrigger => OneTimeExecutor() @@ -97,11 +101,7 @@ class MicroBatchExecution( finishTrigger(dataAvailable) if (dataAvailable) { // Update committed offsets. -<<<<<<< HEAD commitLog.add(currentBatchId) -======= - batchCommitLog.add(currentBatchId) ->>>>>>> Refactor StreamExecution into a parent class so continuous processing can extend it committedOffsets ++= availableOffsets logDebug(s"batch ${currentBatchId} committed") // We'll increase currentBatchId after we complete processing current batch's data @@ -166,11 +166,7 @@ class MicroBatchExecution( /* identify the current batch id: if commit log indicates we successfully processed the * latest batch id in the offset log, then we can safely move to the next batch * i.e., committedBatchId + 1 */ -<<<<<<< HEAD commitLog.getLatest() match { -======= - batchCommitLog.getLatest() match { ->>>>>>> Refactor StreamExecution into a parent class so continuous processing can extend it case Some((latestCommittedBatchId, _)) => if (latestBatchId == latestCommittedBatchId) { /* The last batch was successfully committed, so we can safely process a @@ -315,15 +311,9 @@ class MicroBatchExecution( // It is now safe to discard the metadata beyond the minimum number to retain. // Note that purge is exclusive, i.e. it purges everything before the target ID. -<<<<<<< HEAD if (minLogEntriesToMaintain < currentBatchId) { offsetLog.purge(currentBatchId - minLogEntriesToMaintain) commitLog.purge(currentBatchId - minLogEntriesToMaintain) -======= - if (minBatchesToRetain < currentBatchId) { - offsetLog.purge(currentBatchId - minBatchesToRetain) - batchCommitLog.purge(currentBatchId - minBatchesToRetain) ->>>>>>> Refactor StreamExecution into a parent class so continuous processing can extend it } } } else { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala index 129995dcf3607..7e1b5deb58905 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala @@ -200,7 +200,7 @@ abstract class StreamExecution( * processing is done. Thus, the Nth record in this log indicated data that is currently being * processed and the N-1th entry indicates which offsets have been durably committed to the sink. */ - val offsetLog = new OffsetSeqLog(sparkSession, checkpointFile("offsets")) + abstract def offsetLog /** * A log that records the batch ids that have completed. This is used to check if a batch was From cd3d28ae7d896860da83a500eaedc96cf2e81b54 Mon Sep 17 00:00:00 2001 From: Jose Torres Date: Tue, 12 Dec 2017 16:10:34 -0800 Subject: [PATCH 04/50] rm spurious space --- .../spark/sql/execution/streaming/MicroBatchExecution.scala | 1 - 1 file changed, 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala index 6743ae29c37a2..7f3ec7092849e 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala @@ -44,7 +44,6 @@ class MicroBatchExecution( override val offsetLog = new OffsetSeqLog(sparkSession, checkpointFile("offsets")) override val batchCommitLog = new BatchCommitLog(sparkSession, checkpointFile("commits")) - private val triggerExecutor = trigger match { case t: ProcessingTime => ProcessingTimeExecutor(t, triggerClock) case OneTimeTrigger => OneTimeExecutor() From fdc404d526d429c71e4a90764b2e8a19d903996a Mon Sep 17 00:00:00 2001 From: Jose Torres Date: Tue, 12 Dec 2017 16:35:30 -0800 Subject: [PATCH 05/50] fix compile --- .../apache/spark/sql/execution/streaming/StreamExecution.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala index 7e1b5deb58905..ee74300364d2e 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala @@ -200,7 +200,7 @@ abstract class StreamExecution( * processing is done. Thus, the Nth record in this log indicated data that is currently being * processed and the N-1th entry indicates which offsets have been durably committed to the sink. */ - abstract def offsetLog + def offsetLog: OffsetSeqLog /** * A log that records the batch ids that have completed. This is used to check if a batch was From 7d97c225ec6914a329156c9045aacc7bccf0a470 Mon Sep 17 00:00:00 2001 From: Jose Torres Date: Mon, 11 Dec 2017 12:48:20 -0800 Subject: [PATCH 06/50] harness --- .../apache/spark/sql/streaming/Trigger.java | 54 +++ .../spark/sql/execution/SparkStrategies.scala | 6 + .../datasources/v2/DataSourceV2ScanExec.scala | 20 +- .../datasources/v2/WriteToDataSourceV2.scala | 56 ++- .../streaming/MicroBatchExecution.scala | 41 ++- .../sql/execution/streaming/OffsetSeq.scala | 2 +- .../streaming/ProgressReporter.scala | 10 +- .../streaming/RateSourceProvider.scala | 9 +- .../spark/sql/execution/streaming/Sink.scala | 2 +- .../sql/execution/streaming/Source.scala | 2 +- .../execution/streaming/StreamExecution.scala | 20 +- .../execution/streaming/StreamProgress.scala | 19 +- .../streaming/StreamingRelation.scala | 39 +++ .../ContinuousDataSourceRDDIter.scala | 164 +++++++++ .../continuous/ContinuousExecution.scala | 321 ++++++++++++++++++ .../ContinuousRateStreamSource.scala | 4 + .../continuous/ContinuousTrigger.scala | 70 ++++ .../continuous/EpochCoordinator.scala | 166 +++++++++ .../streaming/sources/memoryV2.scala | 13 + .../sql/streaming/DataStreamReader.scala | 39 ++- .../sql/streaming/DataStreamWriter.scala | 18 +- .../sql/streaming/StreamingQueryManager.scala | 44 ++- .../spark/sql/streaming/StreamSuite.scala | 2 +- .../spark/sql/streaming/StreamTest.scala | 26 +- .../continuous/ContinuousSuite.scala | 123 +++++++ .../spark/sql/test/TestSQLContext.scala | 2 +- 26 files changed, 1185 insertions(+), 87 deletions(-) create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousDataSourceRDDIter.scala create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousTrigger.scala create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/EpochCoordinator.scala create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala diff --git a/sql/core/src/main/java/org/apache/spark/sql/streaming/Trigger.java b/sql/core/src/main/java/org/apache/spark/sql/streaming/Trigger.java index d31790a285687..cf4f0c6899ffc 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/streaming/Trigger.java +++ b/sql/core/src/main/java/org/apache/spark/sql/streaming/Trigger.java @@ -19,6 +19,7 @@ import java.util.concurrent.TimeUnit; +import org.apache.spark.sql.execution.streaming.continuous.ContinuousTrigger; import scala.concurrent.duration.Duration; import org.apache.spark.annotation.InterfaceStability; @@ -95,4 +96,57 @@ public static Trigger ProcessingTime(String interval) { public static Trigger Once() { return OneTimeTrigger$.MODULE$; } + + /** + * A trigger that continuously processes streaming data, asynchronously checkpointing at + * the specified interval. + * + * @since 2.3.0 + */ + public static Trigger Continuous(long intervalMs) { + return ContinuousTrigger.apply(intervalMs); + } + + /** + * A trigger that continuously processes streaming data, asynchronously checkpointing at + * the specified interval. + * + * {{{ + * import java.util.concurrent.TimeUnit + * df.writeStream.trigger(ProcessingTime.create(10, TimeUnit.SECONDS)) + * }}} + * + * @since 2.3.0 + */ + public static Trigger Continuous(long interval, TimeUnit timeUnit) { + return ContinuousTrigger.create(interval, timeUnit); + } + + /** + * (Scala-friendly) + * A trigger that continuously processes streaming data, asynchronously checkpointing at + * the specified interval. + * + * {{{ + * import scala.concurrent.duration._ + * df.writeStream.trigger(Trigger.Continuous(10.seconds)) + * }}} + * @since 2.2.0 + */ + public static Trigger Continuous(Duration interval) { + return ContinuousTrigger.apply(interval); + } + + /** + * A trigger that continuously processes streaming data, asynchronously checkpointing at + * the specified interval. + * + * {{{ + * df.writeStream.trigger(Trigger.Continuous("10 seconds")) + * }}} + * @since 2.2.0 + */ + public static Trigger Continuous(String interval) { + return ContinuousTrigger.apply(interval); + } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala index 9e713cd7bbe2b..4bfd13610b6d6 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala @@ -33,6 +33,7 @@ import org.apache.spark.sql.execution.joins.{BuildLeft, BuildRight, BuildSide} import org.apache.spark.sql.execution.streaming._ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.streaming.StreamingQuery +import org.apache.spark.sql.types.StructType /** * Converts a logical plan into zero or more SparkPlans. This API is exposed for experimenting @@ -374,6 +375,8 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] { StreamingRelationExec(s.sourceName, s.output) :: Nil case s: StreamingExecutionRelation => StreamingRelationExec(s.toString, s.output) :: Nil + case s: StreamingRelationV2 => + StreamingRelationExec(s.sourceName, s.output) :: Nil case _ => Nil } } @@ -404,6 +407,9 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] { case MemoryPlan(sink, output) => val encoder = RowEncoder(sink.schema) LocalTableScanExec(output, sink.allData.map(r => encoder.toRow(r).copy())) :: Nil + case MemoryPlanV2(sink, output) => + val encoder = RowEncoder(StructType.fromAttributes(output)) + LocalTableScanExec(output, sink.allData.map(r => encoder.toRow(r).copy())) :: Nil case logical.Distinct(child) => throw new IllegalStateException( diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2ScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2ScanExec.scala index 3f243dc44e043..52733ad104efe 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2ScanExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2ScanExec.scala @@ -26,6 +26,8 @@ import org.apache.spark.sql.catalyst.encoders.{ExpressionEncoder, RowEncoder} import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.execution.LeafExecNode import org.apache.spark.sql.execution.metric.SQLMetrics +import org.apache.spark.sql.execution.streaming.StreamExecution +import org.apache.spark.sql.execution.streaming.continuous.{ContinuousDataSourceRDD, EpochCoordinatorRef, SetReaderPartitions} import org.apache.spark.sql.sources.v2.reader._ import org.apache.spark.sql.types.StructType @@ -52,10 +54,20 @@ case class DataSourceV2ScanExec( }.asJava } - val inputRDD = new DataSourceRDD(sparkContext, readTasks) - .asInstanceOf[RDD[InternalRow]] + val inputRDD = reader match { + case _: ContinuousReader => + EpochCoordinatorRef.get( + sparkContext.getLocalProperty(StreamExecution.QUERY_ID_KEY), sparkContext.env) + .askSync[Unit](SetReaderPartitions(readTasks.size())) + + new ContinuousDataSourceRDD(sparkContext, readTasks) + + case _ => + new DataSourceRDD(sparkContext, readTasks) + } + val numOutputRows = longMetric("numOutputRows") - inputRDD.map { r => + inputRDD.asInstanceOf[RDD[InternalRow]].map { r => numOutputRows += 1 r } @@ -73,7 +85,7 @@ class RowToUnsafeRowReadTask(rowReadTask: ReadTask[Row], schema: StructType) } } -class RowToUnsafeDataReader(rowReader: DataReader[Row], encoder: ExpressionEncoder[Row]) +class RowToUnsafeDataReader(val rowReader: DataReader[Row], encoder: ExpressionEncoder[Row]) extends DataReader[UnsafeRow] { override def next: Boolean = rowReader.next diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2.scala index b72d15ed15aed..2474f7d45308b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2.scala @@ -17,7 +17,7 @@ package org.apache.spark.sql.execution.datasources.v2 -import org.apache.spark.{SparkException, TaskContext} +import org.apache.spark.{SparkEnv, SparkException, TaskContext} import org.apache.spark.internal.Logging import org.apache.spark.rdd.RDD import org.apache.spark.sql.Row @@ -26,6 +26,8 @@ import org.apache.spark.sql.catalyst.encoders.{ExpressionEncoder, RowEncoder} import org.apache.spark.sql.catalyst.expressions.Attribute import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.execution.SparkPlan +import org.apache.spark.sql.execution.streaming.StreamExecution +import org.apache.spark.sql.execution.streaming.continuous.{CommitPartitionEpoch, ContinuousExecution, EpochCoordinatorRef, SetWriterPartitions} import org.apache.spark.sql.sources.v2.writer._ import org.apache.spark.sql.types.StructType import org.apache.spark.util.Utils @@ -58,10 +60,22 @@ case class WriteToDataSourceV2Exec(writer: DataSourceV2Writer, query: SparkPlan) s"The input RDD has ${messages.length} partitions.") try { + val runTask = writer match { + case w: ContinuousWriter => + EpochCoordinatorRef.get( + sparkContext.getLocalProperty(StreamExecution.QUERY_ID_KEY), sparkContext.env) + .askSync[Unit](SetWriterPartitions(rdd.getNumPartitions)) + + (context: TaskContext, iter: Iterator[InternalRow]) => + DataWritingSparkTask.runContinuous(writeTask, context, iter) + case _ => + (context: TaskContext, iter: Iterator[InternalRow]) => + DataWritingSparkTask.run(writeTask, context, iter) + } + sparkContext.runJob( rdd, - (context: TaskContext, iter: Iterator[InternalRow]) => - DataWritingSparkTask.run(writeTask, context, iter), + runTask, rdd.partitions.indices, (index, message: WriterCommitMessage) => messages(index) = message ) @@ -109,6 +123,42 @@ object DataWritingSparkTask extends Logging { logError(s"Writer for partition ${context.partitionId()} aborted.") }) } + + def runContinuous( + writeTask: DataWriterFactory[InternalRow], + context: TaskContext, + iter: Iterator[InternalRow]): WriterCommitMessage = { + val dataWriter = writeTask.createDataWriter(context.partitionId(), context.attemptNumber()) + val queryId = context.getLocalProperty(StreamExecution.QUERY_ID_KEY) + val currentMsg: WriterCommitMessage = null + var currentEpoch = context.getLocalProperty(ContinuousExecution.START_EPOCH_KEY).toLong + + do { + // write the data and commit this writer. + Utils.tryWithSafeFinallyAndFailureCallbacks(block = { + try { + iter.foreach(dataWriter.write) + logInfo(s"Writer for partition ${context.partitionId()} is committing.") + val msg = dataWriter.commit() + logInfo(s"Writer for partition ${context.partitionId()} committed.") + EpochCoordinatorRef.get(queryId, SparkEnv.get).send( + CommitPartitionEpoch(context.partitionId(), currentEpoch, msg) + ) + currentEpoch += 1 + } catch { + case _: InterruptedException => + // Continuous shutdown always involves an interrupt. Just finish the task. + } + })(catchBlock = { + // If there is an error, abort this writer + logError(s"Writer for partition ${context.partitionId()} is aborting.") + dataWriter.abort() + logError(s"Writer for partition ${context.partitionId()} aborted.") + }) + } while (!context.isInterrupted()) + + currentMsg + } } class InternalRowDataWriterFactory( diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala index 7f3ec7092849e..09c6f64ca5a2c 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala @@ -24,6 +24,7 @@ import org.apache.spark.sql.catalyst.encoders.RowEncoder import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeMap, CurrentBatchTimestamp, CurrentDate, CurrentTimestamp} import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan} import org.apache.spark.sql.execution.SQLExecution +import org.apache.spark.sql.sources.v2.MicroBatchReadSupport import org.apache.spark.sql.streaming.{OutputMode, ProcessingTime, Trigger} import org.apache.spark.util.{Clock, Utils} @@ -43,6 +44,7 @@ class MicroBatchExecution( override val offsetLog = new OffsetSeqLog(sparkSession, checkpointFile("offsets")) override val batchCommitLog = new BatchCommitLog(sparkSession, checkpointFile("commits")) + @volatile protected var sources: Seq[BaseStreamingSource] = Seq.empty private val triggerExecutor = trigger match { case t: ProcessingTime => ProcessingTimeExecutor(t, triggerClock) @@ -56,6 +58,7 @@ class MicroBatchExecution( s"but the current thread was ${Thread.currentThread}") var nextSourceId = 0L val toExecutionRelationMap = MutableMap[StreamingRelation, StreamingExecutionRelation]() + val v2ToExecutionRelationMap = MutableMap[StreamingRelationV2, StreamingExecutionRelation]() val _logicalPlan = analyzedPlan.transform { case streamingRelation@StreamingRelation(dataSource, _, output) => toExecutionRelationMap.getOrElseUpdate(streamingRelation, { @@ -67,6 +70,17 @@ class MicroBatchExecution( // "df.logicalPlan" has already used attributes of the previous `output`. StreamingExecutionRelation(source, output)(sparkSession) }) + case s @ StreamingRelationV2(v2DataSource, _, _, output, v1DataSource) + if !v2DataSource.isInstanceOf[MicroBatchReadSupport] => + v2ToExecutionRelationMap.getOrElseUpdate(s, { + // Materialize source to avoid creating it in every batch + val metadataPath = s"$resolvedCheckpointRoot/sources/$nextSourceId" + val source = v1DataSource.createSource(metadataPath) + nextSourceId += 1 + // We still need to use the previous `output` instead of `source.schema` as attributes in + // "df.logicalPlan" has already used attributes of the previous `output`. + StreamingExecutionRelation(source, output)(sparkSession) + }) } sources = _logicalPlan.collect { case s: StreamingExecutionRelation => s.source } uniqueSources = sources.distinct @@ -173,12 +187,14 @@ class MicroBatchExecution( * Make a call to getBatch using the offsets from previous batch. * because certain sources (e.g., KafkaSource) assume on restart the last * batch will be executed before getOffset is called again. */ - availableOffsets.foreach { ao: (Source, Offset) => - val (source, end) = ao - if (committedOffsets.get(source).map(_ != end).getOrElse(true)) { - val start = committedOffsets.get(source) - source.getBatch(start, end) - } + availableOffsets.foreach { + case (source: Source, end: Offset) => + if (committedOffsets.get(source).map(_ != end).getOrElse(true)) { + val start = committedOffsets.get(source) + source.getBatch(start, end) + } + case nonV1Tuple => + throw new IllegalStateException(s"Unexpected V2 source in $nonV1Tuple") } currentBatchId = latestCommittedBatchId + 1 committedOffsets ++= availableOffsets @@ -222,11 +238,12 @@ class MicroBatchExecution( val hasNewData = { awaitProgressLock.lock() try { - val latestOffsets: Map[Source, Option[Offset]] = uniqueSources.map { s => - updateStatusMessage(s"Getting offsets from $s") - reportTimeTaken("getOffset") { - (s, s.getOffset) - } + val latestOffsets: Map[Source, Option[Offset]] = uniqueSources.map { + case s: Source => + updateStatusMessage(s"Getting offsets from $s") + reportTimeTaken("getOffset") { + (s, s.getOffset) + } }.toMap availableOffsets ++= latestOffsets.filter { case (s, o) => o.nonEmpty }.mapValues(_.get) @@ -334,7 +351,7 @@ class MicroBatchExecution( // Request unprocessed data from all sources. newData = reportTimeTaken("getBatch") { availableOffsets.flatMap { - case (source, available) + case (source: Source, available) if committedOffsets.get(source).map(_ != available).getOrElse(true) => val current = committedOffsets.get(source) val batch = source.getBatch(current, available) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeq.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeq.scala index 4e0a468b962a2..a1b63a6de3823 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeq.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeq.scala @@ -38,7 +38,7 @@ case class OffsetSeq(offsets: Seq[Option[Offset]], metadata: Option[OffsetSeqMet * This method is typically used to associate a serialized offset with actual sources (which * cannot be serialized). */ - def toStreamProgress(sources: Seq[Source]): StreamProgress = { + def toStreamProgress(sources: Seq[BaseStreamingSource]): StreamProgress = { assert(sources.size == offsets.size) new StreamProgress ++ sources.zip(offsets).collect { case (s, Some(o)) => (s, o) } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala index b1c3a8ab235ab..1c9043613cb69 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala @@ -42,7 +42,7 @@ import org.apache.spark.util.Clock trait ProgressReporter extends Logging { case class ExecutionStats( - inputRows: Map[Source, Long], + inputRows: Map[BaseStreamingSource, Long], stateOperators: Seq[StateOperatorProgress], eventTimeStats: Map[String, String]) @@ -53,11 +53,11 @@ trait ProgressReporter extends Logging { protected def triggerClock: Clock protected def logicalPlan: LogicalPlan protected def lastExecution: QueryExecution - protected def newData: Map[Source, DataFrame] + protected def newData: Map[BaseStreamingSource, DataFrame] protected def availableOffsets: StreamProgress protected def committedOffsets: StreamProgress - protected def sources: Seq[Source] - protected def sink: Sink + protected def sources: Seq[BaseStreamingSource] + protected def sink: BaseStreamingSink protected def offsetSeqMetadata: OffsetSeqMetadata protected def currentBatchId: Long protected def sparkSession: SparkSession @@ -230,7 +230,7 @@ trait ProgressReporter extends Logging { } val allLogicalPlanLeaves = lastExecution.logical.collectLeaves() // includes non-streaming val allExecPlanLeaves = lastExecution.executedPlan.collectLeaves() - val numInputRows: Map[Source, Long] = + val numInputRows: Map[BaseStreamingSource, Long] = if (allLogicalPlanLeaves.size == allExecPlanLeaves.size) { val execLeafToSource = allLogicalPlanLeaves.zip(allExecPlanLeaves).flatMap { case (lp, ep) => logicalPlanLeafToSource.get(lp).map { source => ep -> source } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/RateSourceProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/RateSourceProvider.scala index 41761324cf6ac..3f85fa913f28c 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/RateSourceProvider.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/RateSourceProvider.scala @@ -52,7 +52,7 @@ import org.apache.spark.util.{ManualClock, SystemClock} * be resource constrained, and `numPartitions` can be tweaked to help reach the desired speed. */ class RateSourceProvider extends StreamSourceProvider with DataSourceRegister - with DataSourceV2 with MicroBatchReadSupport with ContinuousReadSupport{ + with DataSourceV2 with ContinuousReadSupport { override def sourceSchema( sqlContext: SQLContext, @@ -107,13 +107,6 @@ class RateSourceProvider extends StreamSourceProvider with DataSourceRegister ) } - override def createMicroBatchReader( - schema: Optional[StructType], - checkpointLocation: String, - options: DataSourceV2Options): MicroBatchReader = { - new RateStreamV2Reader(options) - } - override def createContinuousReader( schema: Optional[StructType], checkpointLocation: String, diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Sink.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Sink.scala index d10cd3044ecdf..34bc085d920c1 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Sink.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Sink.scala @@ -24,7 +24,7 @@ import org.apache.spark.sql.DataFrame * exactly once semantics a sink must be idempotent in the face of multiple attempts to add the same * batch. */ -trait Sink { +trait Sink extends BaseStreamingSink { /** * Adds a batch of data to this sink. The data for a given `batchId` is deterministic and if diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Source.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Source.scala index 311942f6dbd84..dbbd59e06909c 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Source.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Source.scala @@ -25,7 +25,7 @@ import org.apache.spark.sql.types.StructType * monotonically increasing notion of progress that can be represented as an [[Offset]]. Spark * will regularly query each [[Source]] to see if any more data is available. */ -trait Source { +trait Source extends BaseStreamingSource { /** Returns the schema of the data from this source */ def schema: StructType diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala index ee74300364d2e..9162398650484 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala @@ -44,6 +44,7 @@ trait State case object INITIALIZING extends State case object ACTIVE extends State case object TERMINATED extends State +case object RECONFIGURING extends State /** * Manages the execution of a streaming Spark SQL query that is occurring in a separate thread. @@ -59,7 +60,7 @@ abstract class StreamExecution( override val name: String, private val checkpointRoot: String, analyzedPlan: LogicalPlan, - val sink: Sink, + val sink: BaseStreamingSink, val trigger: Trigger, val triggerClock: Clock, val outputMode: OutputMode, @@ -150,27 +151,22 @@ abstract class StreamExecution( private val prettyIdString = Option(name).map(_ + " ").getOrElse("") + s"[id = $id, runId = $runId]" - /** - * All stream sources present in the query plan. This will be set when generating logical plan. - */ - @volatile protected var sources: Seq[Source] = Seq.empty - /** * A list of unique sources in the query plan. This will be set when generating logical plan. */ - @volatile protected var uniqueSources: Seq[Source] = Seq.empty + @volatile protected var uniqueSources: Seq[BaseStreamingSource] = Seq.empty /** Defines the internal state of execution */ - private val state = new AtomicReference[State](INITIALIZING) + protected val state = new AtomicReference[State](INITIALIZING) @volatile var lastExecution: IncrementalExecution = _ /** Holds the most recent input data for each source. */ - protected var newData: Map[Source, DataFrame] = _ + protected var newData: Map[BaseStreamingSource, DataFrame] = _ @volatile - private var streamDeathCause: StreamingQueryException = null + protected var streamDeathCause: StreamingQueryException = null /* Get the call site in the caller thread; will pass this into the micro batch thread */ private val callSite = Utils.getCallSite() @@ -302,7 +298,7 @@ abstract class StreamExecution( e, committedOffsets.toOffsetSeq(sources, offsetSeqMetadata).toString, availableOffsets.toOffsetSeq(sources, offsetSeqMetadata).toString) - logError(s"Query $prettyIdString terminated with error", e) + // logError(s"Query $prettyIdString terminated with error", e) updateStatusMessage(s"Terminated with exception: ${e.getMessage}") // Rethrow the fatal errors to allow the user using `Thread.UncaughtExceptionHandler` to // handle them @@ -389,7 +385,7 @@ abstract class StreamExecution( } /** Stops all streaming sources safely. */ - private def stopSources(): Unit = { + protected def stopSources(): Unit = { uniqueSources.foreach { source => try { source.stop() diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamProgress.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamProgress.scala index a3f3662e6f4c9..8531070b1bc49 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamProgress.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamProgress.scala @@ -23,25 +23,28 @@ import scala.collection.{immutable, GenTraversableOnce} * A helper class that looks like a Map[Source, Offset]. */ class StreamProgress( - val baseMap: immutable.Map[Source, Offset] = new immutable.HashMap[Source, Offset]) - extends scala.collection.immutable.Map[Source, Offset] { + val baseMap: immutable.Map[BaseStreamingSource, Offset] = + new immutable.HashMap[BaseStreamingSource, Offset]) + extends scala.collection.immutable.Map[BaseStreamingSource, Offset] { - def toOffsetSeq(source: Seq[Source], metadata: OffsetSeqMetadata): OffsetSeq = { + def toOffsetSeq(source: Seq[BaseStreamingSource], metadata: OffsetSeqMetadata): OffsetSeq = { OffsetSeq(source.map(get), Some(metadata)) } override def toString: String = baseMap.map { case (k, v) => s"$k: $v"}.mkString("{", ",", "}") - override def +[B1 >: Offset](kv: (Source, B1)): Map[Source, B1] = baseMap + kv + override def +[B1 >: Offset](kv: (BaseStreamingSource, B1)): Map[BaseStreamingSource, B1] = { + baseMap + kv + } - override def get(key: Source): Option[Offset] = baseMap.get(key) + override def get(key: BaseStreamingSource): Option[Offset] = baseMap.get(key) - override def iterator: Iterator[(Source, Offset)] = baseMap.iterator + override def iterator: Iterator[(BaseStreamingSource, Offset)] = baseMap.iterator - override def -(key: Source): Map[Source, Offset] = baseMap - key + override def -(key: BaseStreamingSource): Map[BaseStreamingSource, Offset] = baseMap - key - def ++(updates: GenTraversableOnce[(Source, Offset)]): StreamProgress = { + def ++(updates: GenTraversableOnce[(BaseStreamingSource, Offset)]): StreamProgress = { new StreamProgress(baseMap ++ updates) } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingRelation.scala index 6b82c78ea653d..69d2dd061e5bd 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingRelation.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingRelation.scala @@ -25,6 +25,7 @@ import org.apache.spark.sql.catalyst.plans.logical.LeafNode import org.apache.spark.sql.catalyst.plans.logical.Statistics import org.apache.spark.sql.execution.LeafExecNode import org.apache.spark.sql.execution.datasources.DataSource +import org.apache.spark.sql.sources.v2.{ContinuousReadSupport, DataSourceV2} object StreamingRelation { def apply(dataSource: DataSource): StreamingRelation = { @@ -75,6 +76,44 @@ case class StreamingExecutionRelation( ) } +// We have to pack in the V1 data source as a shim, for the case when a source implements +// continuous processing (which is always V2) but only has V1 microbatch support. We don't +// know at read time whether the query is conntinuous or not, so we need to be able to +// swap a V1 relation back in. +case class StreamingRelationV2( + dataSource: DataSourceV2, + sourceName: String, + extraOptions: Map[String, String], + output: Seq[Attribute], + v1DataSource: DataSource) + extends LeafNode { + override def isStreaming: Boolean = true + override def toString: String = sourceName + + // TODO: can we get the conf here somehow? + override def computeStats(): Statistics = Statistics( + sizeInBytes = 0 + ) +} + +case class ContinuousExecutionRelation( + source: ContinuousReadSupport, + extraOptions: Map[String, String], + output: Seq[Attribute])(session: SparkSession) + extends LeafNode { + + override def isStreaming: Boolean = true + override def toString: String = source.toString + + // There's no sensible value here. On the execution path, this relation will be + // swapped out with microbatches. But some dataframe operations (in particular explain) do lead + // to this node surviving analysis. So we satisfy the LeafNode contract with the session default + // value. + override def computeStats(): Statistics = Statistics( + sizeInBytes = BigInt(session.sessionState.conf.defaultSizeInBytes) + ) +} + /** * A dummy physical plan for [[StreamingRelation]] to support * [[org.apache.spark.sql.Dataset.explain]] diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousDataSourceRDDIter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousDataSourceRDDIter.scala new file mode 100644 index 0000000000000..274a7cac35e01 --- /dev/null +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousDataSourceRDDIter.scala @@ -0,0 +1,164 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.streaming.continuous + +import java.util.concurrent.{ArrayBlockingQueue, BlockingQueue} +import java.util.concurrent.atomic.{AtomicBoolean, AtomicLong} + +import scala.collection.JavaConverters._ + +import org.apache.spark._ +import org.apache.spark.internal.Logging +import org.apache.spark.rdd.RDD +import org.apache.spark.rpc.RpcEndpointRef +import org.apache.spark.sql.Row +import org.apache.spark.sql.catalyst.expressions.UnsafeRow +import org.apache.spark.sql.execution.datasources.v2.{DataSourceRDDPartition, RowToUnsafeDataReader} +import org.apache.spark.sql.execution.streaming._ +import org.apache.spark.sql.execution.streaming.continuous._ +import org.apache.spark.sql.sources.v2.reader._ +import org.apache.spark.sql.streaming.ProcessingTime +import org.apache.spark.util.SystemClock + +class ContinuousDataSourceRDD( + sc: SparkContext, + @transient private val readTasks: java.util.List[ReadTask[UnsafeRow]]) + extends RDD[UnsafeRow](sc, Nil) { + + override protected def getPartitions: Array[Partition] = { + readTasks.asScala.zipWithIndex.map { + case (readTask, index) => new DataSourceRDDPartition(index, readTask) + }.toArray + } + + override def compute(split: Partition, context: TaskContext): Iterator[UnsafeRow] = { + val reader = split.asInstanceOf[DataSourceRDDPartition].readTask.createDataReader() + + // TODO: capacity option + val queue = new ArrayBlockingQueue[(UnsafeRow, PartitionOffset)](1024) + + val epochPollThread = new EpochPollThread(queue, context) + epochPollThread.setDaemon(true) + epochPollThread.start() + + val dataReaderThread = new DataReaderThread(reader, queue, context) + dataReaderThread.setDaemon(true) + dataReaderThread.start() + + context.addTaskCompletionListener(_ => { + reader.close() + dataReaderThread.interrupt() + epochPollThread.interrupt() + }) + + val epochEndpoint = EpochCoordinatorRef.get( + context.getLocalProperty(StreamExecution.QUERY_ID_KEY), SparkEnv.get) + new Iterator[UnsafeRow] { + private var currentRow: UnsafeRow = _ + private var currentOffset: PartitionOffset = _ + + override def hasNext(): Boolean = { + val newTuple = queue.take() + val newOffset = newTuple._2 + currentRow = newTuple._1 + if (currentRow == null) { + epochEndpoint.send(ReportPartitionOffset( + context.partitionId(), + newOffset.asInstanceOf[EpochPackedPartitionOffset].epoch, + currentOffset)) + false + } else { + currentOffset = newOffset + true + } + } + + override def next(): UnsafeRow = { + val r = currentRow + currentRow = null + r + } + } + } + + override def getPreferredLocations(split: Partition): Seq[String] = { + split.asInstanceOf[DataSourceRDDPartition].readTask.preferredLocations() + } +} + +case class EpochPackedPartitionOffset(epoch: Long) extends PartitionOffset + +class EpochPollThread( + queue: BlockingQueue[(UnsafeRow, PartitionOffset)], + context: TaskContext) + extends Thread with Logging { + private val epochEndpoint = EpochCoordinatorRef.get( + context.getLocalProperty(StreamExecution.QUERY_ID_KEY), SparkEnv.get) + private var currentEpoch = context.getLocalProperty(ContinuousExecution.START_EPOCH_KEY).toLong + + override def run(): Unit = { + // TODO parameterize + try { + ProcessingTimeExecutor(ProcessingTime(100), new SystemClock()) + .execute { () => + val newEpoch = epochEndpoint.askSync[Long](GetCurrentEpoch()) + for (i <- currentEpoch to newEpoch - 1) { + queue.put((null, EpochPackedPartitionOffset(i + 1))) + logDebug(s"Sent marker to start epoch ${i + 1}") + } + currentEpoch = newEpoch + true + } + } catch { + case (_: InterruptedException | _: SparkException) if context.isInterrupted() => + // Continuous shutdown might interrupt us, or it might clean up the endpoint before + // interrupting us. Unfortunately, a missing endpoint just throws a generic SparkException. + // In either case, as long as the context shows interrupted, we can safely clean shutdown. + return + } + } +} + +class DataReaderThread( + reader: DataReader[UnsafeRow], + queue: BlockingQueue[(UnsafeRow, PartitionOffset)], + context: TaskContext) extends Thread { + override def run(): Unit = { + val baseReader = reader match { + case r: ContinuousDataReader[UnsafeRow] => r + case wrapped: RowToUnsafeDataReader => + wrapped.rowReader.asInstanceOf[ContinuousDataReader[Row]] + case _ => + throw new IllegalStateException(s"Unknown continuous reader type ${reader.getClass}") + } + try { + while (!context.isInterrupted && !context.isCompleted()) { + if (!reader.next()) { + throw new IllegalStateException( + "Continuous reader reported no remaining elements! Reader should have blocked waiting.") + } + + queue.put((reader.get(), baseReader.getOffset)) + } + } catch { + case _: InterruptedException if context.isInterrupted() => + // Continuous shutdown always involves an interrupt; shut down quietly. + return + } + } +} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala new file mode 100644 index 0000000000000..637143d8a5acb --- /dev/null +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala @@ -0,0 +1,321 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.streaming.continuous + +import java.util.concurrent.TimeUnit + +import scala.collection.mutable.{ArrayBuffer, Map => MutableMap} + +import org.apache.spark.SparkEnv +import org.apache.spark.sql.{AnalysisException, SparkSession} +import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeMap, CurrentBatchTimestamp, CurrentDate, CurrentTimestamp} +import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan +import org.apache.spark.sql.execution.SQLExecution +import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2Relation, WriteToDataSourceV2} +import org.apache.spark.sql.execution.streaming.{ContinuousExecutionRelation, StreamingRelationV2, _} +import org.apache.spark.sql.sources.v2.{ContinuousReadSupport, ContinuousWriteSupport, DataSourceV2Options} +import org.apache.spark.sql.sources.v2.reader.{ContinuousReader, Offset, PartitionOffset} +import org.apache.spark.sql.sources.v2.writer.ContinuousWriter +import org.apache.spark.sql.streaming.{OutputMode, ProcessingTime, Trigger} +import org.apache.spark.sql.types.StructType +import org.apache.spark.util.{Clock, Utils} + +class ContinuousExecution( + sparkSession: SparkSession, + name: String, + checkpointRoot: String, + analyzedPlan: LogicalPlan, + sink: ContinuousWriteSupport, + trigger: Trigger, + triggerClock: Clock, + outputMode: OutputMode, + extraOptions: Map[String, String], + deleteCheckpointOnStop: Boolean) + extends StreamExecution( + sparkSession, name, checkpointRoot, analyzedPlan, sink, + trigger, triggerClock, outputMode, deleteCheckpointOnStop) { + + @volatile protected var continuousSources: Seq[ContinuousReader] = Seq.empty + override protected def sources: Seq[BaseStreamingSource] = continuousSources + + override lazy val logicalPlan: LogicalPlan = { + assert(queryExecutionThread eq Thread.currentThread, + "logicalPlan must be initialized in StreamExecutionThread " + + s"but the current thread was ${Thread.currentThread}") + var nextSourceId = 0L + val toExecutionRelationMap = MutableMap[StreamingRelationV2, ContinuousExecutionRelation]() + analyzedPlan.transform { + case r @ StreamingRelationV2( + source: ContinuousReadSupport, _, extraReaderOptions, output, _) => + toExecutionRelationMap.getOrElseUpdate(r, { + ContinuousExecutionRelation(source, extraReaderOptions, output)(sparkSession) + }) + case StreamingRelationV2(_, sourceName, _, _, _) => + throw new AnalysisException( + s"Data source $sourceName does not support continuous processing.") + } + } + + private val triggerExecutor = trigger match { + case ContinuousTrigger(t) => ProcessingTimeExecutor(ProcessingTime(t), triggerClock) + case _ => throw new IllegalStateException(s"Unsupported type of trigger: $trigger") + } + + override protected def runActivatedStream(sparkSessionForStream: SparkSession): Unit = { + do { + try { + runFromOffsets(sparkSessionForStream) + } catch { + case _: Throwable if state.get().equals(RECONFIGURING) => + // swallow exception and run again + state.set(ACTIVE) + } + } while (true) + } + + /** + * Populate the start offsets to start the execution at the current offsets stored in the sink + * (i.e. avoid reprocessing data that we have already processed). This function must be called + * before any processing occurs and will populate the following fields: + * - currentBatchId + * - committedOffsets + * - availableOffsets + * The basic structure of this method is as follows: + * + * Identify (from the offset log) the offsets used to run the last batch + * IF last batch exists THEN + * Set the next batch to be executed as the last recovered batch + * Check the commit log to see which batch was committed last + * IF the last batch was committed THEN + * Call getBatch using the last batch start and end offsets + * // ^^^^ above line is needed since some sources assume last batch always re-executes + * Setup for a new batch i.e., start = last batch end, and identify new end + * DONE + * ELSE + * Identify a brand new batch + * DONE + */ + private def getStartOffsets(sparkSessionToRunBatches: SparkSession): OffsetSeq = { + batchCommitLog.getLatest() match { + case Some((latestEpochId, _)) => + currentBatchId = latestEpochId + 1 + val nextOffsets = offsetLog.get(currentBatchId).getOrElse { + throw new IllegalStateException( + s"Batch $latestEpochId was committed without next epoch offsets!") + } + // TODO initialize committed offsets + + logDebug(s"Resuming at epoch $currentBatchId with committed offsets " + + s"$committedOffsets and available offsets $availableOffsets") + nextOffsets + case None => + // We are starting this stream for the first time. Offsets are all None. + logInfo(s"Starting new streaming query.") + currentBatchId = 0 + OffsetSeq.fill(continuousSources.map(_ => null): _*) + } + } + + /** + * Processes any data available between `availableOffsets` and `committedOffsets`. + * @param sparkSessionToRunBatch Isolated [[SparkSession]] to run this batch with. + */ + private def runFromOffsets(sparkSessionToRunBatch: SparkSession): Unit = { + import scala.collection.JavaConverters._ + // A list of attributes that will need to be updated. + val replacements = new ArrayBuffer[(Attribute, Attribute)] + // Translate from continuous relation to the underlying data source. + var nextSourceId = 0 + continuousSources = logicalPlan.collect { + case ContinuousExecutionRelation(dataSource, extraReaderOptions, output) => + val metadataPath = s"$resolvedCheckpointRoot/sources/$nextSourceId" + nextSourceId += 1 + + dataSource.createContinuousReader( + java.util.Optional.empty[StructType](), + metadataPath, + new DataSourceV2Options(extraReaderOptions.asJava)) + } + uniqueSources = continuousSources.distinct + + val offsets = getStartOffsets(sparkSessionToRunBatch) + + var insertedSourceId = 0 + val withNewSources = logicalPlan transform { + case ContinuousExecutionRelation(_, _, output) => + val reader = continuousSources(insertedSourceId) + insertedSourceId += 1 + val newOutput = reader.readSchema().toAttributes + + assert(output.size == newOutput.size, + s"Invalid reader: ${Utils.truncatedString(output, ",")} != " + + s"${Utils.truncatedString(newOutput, ",")}") + replacements ++= output.zip(newOutput) + + // TODO multiple sources maybe? offsets(0) has to be changed to track source id + reader.setOffset(java.util.Optional.ofNullable(offsets.offsets(0).orNull)) + DataSourceV2Relation(newOutput, reader) + } + + // Rewire the plan to use the new attributes that were returned by the source. + val replacementMap = AttributeMap(replacements) + val triggerLogicalPlan = withNewSources transformAllExpressions { + case a: Attribute if replacementMap.contains(a) => + replacementMap(a).withMetadata(a.metadata) + // TODO properly handle timestamp + case ct: CurrentTimestamp => + CurrentBatchTimestamp(0, ct.dataType) + case cd: CurrentDate => + CurrentBatchTimestamp(0, cd.dataType, cd.timeZoneId) + } + + val writer = sink.createContinuousWriter( + s"$runId", + triggerLogicalPlan.schema, + outputMode, + new DataSourceV2Options(extraOptions.asJava)) + val withSink = WriteToDataSourceV2(writer.get(), triggerLogicalPlan) + + val reader = withSink.collect { + case DataSourceV2Relation(_, r: ContinuousReader) => r + }.head + + reportTimeTaken("queryPlanning") { + lastExecution = new IncrementalExecution( + sparkSessionToRunBatch, + withSink, + outputMode, + checkpointFile("state"), + runId, + currentBatchId, + offsetSeqMetadata) + lastExecution.executedPlan // Force the lazy generation of execution plan + } + + sparkSession.sparkContext.setLocalProperty( + ContinuousExecution.START_EPOCH_KEY, currentBatchId.toString) + + // Use the parent Spark session since it's where this query is registered. + // TODO: we should use runId for the endpoint to be safe against cross-contamination + // from failed runs + val epochEndpoint = + EpochCoordinatorRef.create( + writer.get(), reader, currentBatchId, id.toString, sparkSession, SparkEnv.get) + val epochUpdateThread = new Thread(new Runnable { + override def run: Unit = { + try { + triggerExecutor.execute(() => { + startTrigger() + + if (reader.needsReconfiguration()) { + stopSources() + state.set(RECONFIGURING) + if (queryExecutionThread.isAlive) { + sparkSession.sparkContext.cancelJobGroup(runId.toString) + queryExecutionThread.interrupt() + // No need to join - this thread is about to end anyway. + } + false + } else if (isActive) { + currentBatchId = epochEndpoint.askSync[Long](IncrementAndGetEpoch()) + logInfo(s"New epoch $currentBatchId is starting.") + true + } else { + false + } + }) + } catch { + case _: InterruptedException => + // Cleanly stop the query. + return + } + } + }) + + try { + epochUpdateThread.setDaemon(true) + epochUpdateThread.start() + + reportTimeTaken("runContinuous") { + SQLExecution.withNewExecutionId( + sparkSessionToRunBatch, lastExecution)(lastExecution.toRdd) + } + } finally { + SparkEnv.get.rpcEnv.stop(epochEndpoint) + + epochUpdateThread.interrupt() + epochUpdateThread.join() + } + } + + def addOffset( + epoch: Long, reader: ContinuousReader, partitionOffsets: Seq[PartitionOffset]): Unit = { + assert(continuousSources.length == 1, "only one continuous source supported currently") + + if (partitionOffsets.contains(null)) { + // If any offset is null, that means the corresponding partition hasn't seen any data yet, so + // there's nothing meaningful to add to the offset log. + } + val globalOffset = reader.mergeOffsets(partitionOffsets.toArray) + synchronized { + offsetLog.add(epoch, OffsetSeq.fill(globalOffset)) + } + } + + def commit(epoch: Long): Unit = { + assert(continuousSources.length == 1, "only one continuous source supported currently") + synchronized { + batchCommitLog.add(epoch) + val offset = offsetLog.get(epoch + 1).get.offsets(0).get + committedOffsets ++= Seq(continuousSources(0) -> offset) + } + + awaitProgressLock.lock() + try { + awaitProgressLockCondition.signalAll() + } finally { + awaitProgressLock.unlock() + } + } + + /** + * Blocks the current thread until execution has received offsets for the specified epoch. + */ + /* private[sql] def awaitEpoch(epoch: Long): Unit = { + def notDone = { + val latestCommit = batchCommitLog.getLatest <= epoch + } + + while (notDone) { + awaitProgressLock.lock() + try { + awaitProgressLockCondition.await(100, TimeUnit.MILLISECONDS) + if (streamDeathCause != null) { + throw streamDeathCause + } + } finally { + awaitProgressLock.unlock() + } + } + } */ +} + + +object ContinuousExecution { + val START_EPOCH_KEY = "__continuous_start_epoch" +} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousRateStreamSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousRateStreamSource.scala index 4c3a1ee201ac1..a66be6634814d 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousRateStreamSource.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousRateStreamSource.scala @@ -66,6 +66,9 @@ class ContinuousRateStreamReader(options: DataSourceV2Options) override def getStartOffset(): Offset = offset + // Exposed so unit tests can reliably ensure they end after a desired row count. + private[sql] var lastStartTime: Long = _ + override def createReadTasks(): java.util.List[ReadTask[Row]] = { val partitionStartMap = offset match { case off: RateStreamOffset => off.partitionToValueAndRunTimeMs @@ -137,6 +140,7 @@ class RateStreamDataReader( return false } + currentValue += increment currentRow = Row( DateTimeUtils.toJavaTimestamp(DateTimeUtils.fromMillis(nextReadTime)), currentValue) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousTrigger.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousTrigger.scala new file mode 100644 index 0000000000000..90e1766c4d9f1 --- /dev/null +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousTrigger.scala @@ -0,0 +1,70 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.streaming.continuous + +import java.util.concurrent.TimeUnit + +import scala.concurrent.duration.Duration + +import org.apache.commons.lang3.StringUtils + +import org.apache.spark.annotation.{Experimental, InterfaceStability} +import org.apache.spark.sql.streaming.{ProcessingTime, Trigger} +import org.apache.spark.unsafe.types.CalendarInterval + +/** + * A [[Trigger]] that continuously processes streaming data, asynchronously checkpointing at + * the specified interval. + */ +@InterfaceStability.Evolving +case class ContinuousTrigger(intervalMs: Long) extends Trigger { + require(intervalMs >= 0, "the interval of trigger should not be negative") +} + +private[sql] object ContinuousTrigger { + def apply(interval: String): ContinuousTrigger = { + if (StringUtils.isBlank(interval)) { + throw new IllegalArgumentException( + "interval cannot be null or blank.") + } + val cal = if (interval.startsWith("interval")) { + CalendarInterval.fromString(interval) + } else { + CalendarInterval.fromString("interval " + interval) + } + if (cal == null) { + throw new IllegalArgumentException(s"Invalid interval: $interval") + } + if (cal.months > 0) { + throw new IllegalArgumentException(s"Doesn't support month or year interval: $interval") + } + new ContinuousTrigger(cal.microseconds / 1000) + } + + def apply(interval: Duration): ContinuousTrigger = { + ContinuousTrigger(interval.toMillis) + } + + def create(interval: String): ContinuousTrigger = { + apply(interval) + } + + def create(interval: Long, unit: TimeUnit): ContinuousTrigger = { + ContinuousTrigger(unit.toMillis(interval)) + } +} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/EpochCoordinator.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/EpochCoordinator.scala new file mode 100644 index 0000000000000..eb3f7779cba96 --- /dev/null +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/EpochCoordinator.scala @@ -0,0 +1,166 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.streaming.continuous + +import java.util.concurrent.atomic.AtomicLong + +import scala.collection.mutable + +import org.apache.spark.SparkEnv +import org.apache.spark.internal.Logging +import org.apache.spark.rpc.{RpcCallContext, RpcEndpointRef, RpcEnv, ThreadSafeRpcEndpoint} +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.execution.streaming.StreamingQueryWrapper +import org.apache.spark.sql.sources.v2.reader.{ContinuousReader, PartitionOffset} +import org.apache.spark.sql.sources.v2.writer.{ContinuousWriter, WriterCommitMessage} +import org.apache.spark.util.RpcUtils + +case class CommitPartitionEpoch( + partitionId: Int, + epoch: Long, + message: WriterCommitMessage) + +case class GetCurrentEpoch() + +case class ReportPartitionOffset( + partitionId: Int, + epoch: Long, + offset: PartitionOffset) + +case class SetReaderPartitions(numPartitions: Int) +case class SetWriterPartitions(numPartitions: Int) + +// Should be used only by ContinuousExecution during epoch advancement. +case class IncrementAndGetEpoch() + + +/** Helper object used to create reference to [[EpochCoordinator]]. */ +object EpochCoordinatorRef extends Logging { + + private val endpointNamePrefix = "EpochCoordinator-" + + private def endpointName(queryId: String) = s"EpochCoordinator-$queryId" + + /** + * Create a reference to a new [[EpochCoordinator]]. + */ + def create( + writer: ContinuousWriter, + reader: ContinuousReader, + startEpoch: Long, + queryId: String, + session: SparkSession, + env: SparkEnv): RpcEndpointRef = synchronized { + val coordinator = new EpochCoordinator(writer, reader, startEpoch, queryId, session, env.rpcEnv) + val ref = env.rpcEnv.setupEndpoint(endpointName(queryId), coordinator) + logInfo("Registered EpochCoordinator endpoint") + ref + } + + def get(queryId: String, env: SparkEnv): RpcEndpointRef = synchronized { + val rpcEndpointRef = RpcUtils.makeDriverRef(endpointName(queryId), env.conf, env.rpcEnv) + logDebug("Retrieved existing EpochCoordinator endpoint") + rpcEndpointRef + } +} + +class EpochCoordinator( + writer: ContinuousWriter, + reader: ContinuousReader, + startEpoch: Long, + queryId: String, + session: SparkSession, + override val rpcEnv: RpcEnv) + extends ThreadSafeRpcEndpoint with Logging { + + private var numReaderPartitions: Int = _ + private var numWriterPartitions: Int = _ + + // Should only be mutated by this coordinator's subthread. + private var currentDriverEpoch = startEpoch + + // (epoch, partition) -> message + // This is small enough that we don't worry too much about optimizing the shape of the structure. + private val partitionCommits = + mutable.Map[(Long, Int), WriterCommitMessage]() + + private val partitionOffsets = + mutable.Map[(Long, Int), PartitionOffset]() + + private def resolveCommitsAtEpoch(epoch: Long) = { + val thisEpochCommits = + partitionCommits.collect { case ((e, _), msg) if e == epoch => msg } + val nextEpochOffsets = + partitionOffsets.collect { case ((e, _), o) if e == epoch + 1 => o } + + if (thisEpochCommits.size == numWriterPartitions && + nextEpochOffsets.size == numReaderPartitions) { + logDebug(s"Epoch $epoch has received commits from all partitions. Committing globally.") + val query = session.streams.get(queryId).asInstanceOf[StreamingQueryWrapper] + .streamingQuery.asInstanceOf[ContinuousExecution] + // Sequencing is important - writer commits to epoch are required to be replayable + writer.commit(epoch, thisEpochCommits.toArray) + query.commit(epoch) + // TODO: cleanup unnecessary state + } + } + + override def receive: PartialFunction[Any, Unit] = { + case CommitPartitionEpoch(partitionId, epoch, message) => + logDebug(s"Got commit from partition $partitionId at epoch $epoch: $message") + if (!partitionCommits.isDefinedAt((epoch, partitionId))) { + partitionCommits.put((epoch, partitionId), message) + resolveCommitsAtEpoch(epoch) + } + + case ReportPartitionOffset(partitionId, epoch, offset) if offset != null => + val query = session.streams.get(queryId).asInstanceOf[StreamingQueryWrapper] + .streamingQuery.asInstanceOf[ContinuousExecution] + partitionOffsets.put((epoch, partitionId), offset) + val thisEpochOffsets = + partitionOffsets.collect { case ((e, _), o) if e == epoch => o } + if (thisEpochOffsets.size == numReaderPartitions) { + logDebug(s"Epoch $epoch has offsets reported from all partitions: $thisEpochOffsets") + query.addOffset(epoch, reader, thisEpochOffsets.toSeq) + resolveCommitsAtEpoch(epoch - 1) + } + + // We can get null offsets reported if the epoch advances before the executor + // has read any data. Ignore those, since they don't affect where we'd want to restart. + case ReportPartitionOffset(_, _, offset) if offset == null => + } + + override def receiveAndReply(context: RpcCallContext): PartialFunction[Any, Unit] = { + case GetCurrentEpoch() => + val result = currentDriverEpoch + logDebug(s"Epoch $result") + context.reply(result) + + case IncrementAndGetEpoch() => + currentDriverEpoch += 1 + context.reply(currentDriverEpoch) + + case SetReaderPartitions(numPartitions) => + numReaderPartitions = numPartitions + context.reply(()) + + case SetWriterPartitions(numPartitions) => + numWriterPartitions = numPartitions + context.reply(()) + } +} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/memoryV2.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/memoryV2.scala index 94c5dd63089b1..972248d5e4df8 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/memoryV2.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/memoryV2.scala @@ -25,6 +25,8 @@ import scala.util.control.NonFatal import org.apache.spark.internal.Logging import org.apache.spark.sql.Row +import org.apache.spark.sql.catalyst.expressions.Attribute +import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, Statistics} import org.apache.spark.sql.catalyst.streaming.InternalOutputModes.{Append, Complete, Update} import org.apache.spark.sql.execution.streaming.Sink import org.apache.spark.sql.sources.v2.{ContinuousWriteSupport, DataSourceV2, DataSourceV2Options, MicroBatchWriteSupport} @@ -177,3 +179,14 @@ class MemoryDataWriter(partition: Int, outputMode: OutputMode) override def abort(): Unit = {} } + + +/** + * Used to query the data that has been written into a [[MemorySink]]. + */ +case class MemoryPlanV2(sink: MemorySinkV2, override val output: Seq[Attribute]) extends LeafNode { + private val sizePerRow = output.map(_.dataType.defaultSize).sum + + override def computeStats(): Statistics = Statistics(sizePerRow * sink.allData.size) +} + diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala index 41aa02c2b5e35..4ec422b0f9745 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala @@ -26,7 +26,8 @@ import org.apache.spark.internal.Logging import org.apache.spark.sql.{AnalysisException, DataFrame, Dataset, SparkSession} import org.apache.spark.sql.execution.command.DDLUtils import org.apache.spark.sql.execution.datasources.DataSource -import org.apache.spark.sql.execution.streaming.StreamingRelation +import org.apache.spark.sql.execution.streaming.{StreamingRelation, StreamingRelationV2} +import org.apache.spark.sql.sources.v2.{ContinuousReadSupport, DataSourceV2Options, MicroBatchReadSupport} import org.apache.spark.sql.types.StructType /** @@ -153,13 +154,35 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo "read files of Hive data source directly.") } - val dataSource = - DataSource( - sparkSession, - userSpecifiedSchema = userSpecifiedSchema, - className = source, - options = extraOptions.toMap) - Dataset.ofRows(sparkSession, StreamingRelation(dataSource)) + val ds = DataSource.lookupDataSource(source, sparkSession.sqlContext.conf).newInstance() + val options = new DataSourceV2Options(extraOptions.asJava) + // We need to generate the V1 data source so we can pass it to the V2 relation as a shim. + // We can't be sure at this point whether we'll actually want to use V2, since we don't know the + // writer or whether the query is continuous. + val v1DataSource = DataSource( + sparkSession, + userSpecifiedSchema = userSpecifiedSchema, + className = source, + options = extraOptions.toMap) + ds match { + case s: ContinuousReadSupport => + // TODO: What do we pass as the metadata log path? We just need some scratch space, the + // schema can't depend on it + val tempReader = s.createContinuousReader( + java.util.Optional.ofNullable(userSpecifiedSchema.orNull), + "scratch/path/for/schema", + options) + // Generate the V1 node to catch errors thrown within generation. + StreamingRelation(v1DataSource) + Dataset.ofRows( + sparkSession, + StreamingRelationV2( + s, source, extraOptions.toMap, + tempReader.readSchema().toAttributes, v1DataSource)) + case _ => + // Code path for data source v1. + Dataset.ofRows(sparkSession, StreamingRelation(v1DataSource)) + } } /** diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala index 0be69b98abc8a..573ca9a85ccaa 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala @@ -26,7 +26,8 @@ import org.apache.spark.sql.{AnalysisException, Dataset, ForeachWriter} import org.apache.spark.sql.catalyst.streaming.InternalOutputModes import org.apache.spark.sql.execution.command.DDLUtils import org.apache.spark.sql.execution.datasources.DataSource -import org.apache.spark.sql.execution.streaming.{ForeachSink, MemoryPlan, MemorySink} +import org.apache.spark.sql.execution.streaming._ +import org.apache.spark.sql.execution.streaming.continuous.ContinuousTrigger /** * Interface used to write a streaming `Dataset` to external storage systems (e.g. file systems, @@ -240,14 +241,23 @@ final class DataStreamWriter[T] private[sql](ds: Dataset[T]) { if (extraOptions.get("queryName").isEmpty) { throw new AnalysisException("queryName must be specified for memory sink") } - val sink = new MemorySink(df.schema, outputMode) - val resultDf = Dataset.ofRows(df.sparkSession, new MemoryPlan(sink)) + val (sink, resultDf) = trigger match { + case _: ContinuousTrigger => + val s = new MemorySinkV2() + val r = Dataset.ofRows(df.sparkSession, new MemoryPlanV2(s, df.schema.toAttributes)) + (s, r) + case _ => + val s = new MemorySink(df.schema, outputMode) + val r = Dataset.ofRows(df.sparkSession, new MemoryPlan(s)) + (s, r) + } val chkpointLoc = extraOptions.get("checkpointLocation") val recoverFromChkpoint = outputMode == OutputMode.Complete() val query = df.sparkSession.sessionState.streamingQueryManager.startQuery( extraOptions.get("queryName"), chkpointLoc, df, + extraOptions.toMap, sink, outputMode, useTempCheckpointLocation = true, @@ -262,6 +272,7 @@ final class DataStreamWriter[T] private[sql](ds: Dataset[T]) { extraOptions.get("queryName"), extraOptions.get("checkpointLocation"), df, + extraOptions.toMap, sink, outputMode, useTempCheckpointLocation = true, @@ -277,6 +288,7 @@ final class DataStreamWriter[T] private[sql](ds: Dataset[T]) { extraOptions.get("queryName"), extraOptions.get("checkpointLocation"), df, + extraOptions.toMap, dataSource.createSink(outputMode), outputMode, useTempCheckpointLocation = source == "console", diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala index 555d6e23f9385..dda19b8ded08b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala @@ -29,8 +29,10 @@ import org.apache.spark.internal.Logging import org.apache.spark.sql.{AnalysisException, DataFrame, SparkSession} import org.apache.spark.sql.catalyst.analysis.UnsupportedOperationChecker import org.apache.spark.sql.execution.streaming._ +import org.apache.spark.sql.execution.streaming.continuous.ContinuousExecution import org.apache.spark.sql.execution.streaming.state.StateStoreCoordinatorRef import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.sources.v2.ContinuousWriteSupport import org.apache.spark.util.{Clock, SystemClock, Utils} /** @@ -188,7 +190,8 @@ class StreamingQueryManager private[sql] (sparkSession: SparkSession) extends Lo userSpecifiedName: Option[String], userSpecifiedCheckpointLocation: Option[String], df: DataFrame, - sink: Sink, + extraOptions: Map[String, String], + sink: BaseStreamingSink, outputMode: OutputMode, useTempCheckpointLocation: Boolean, recoverFromCheckpointLocation: Boolean, @@ -237,16 +240,31 @@ class StreamingQueryManager private[sql] (sparkSession: SparkSession) extends Lo "is not supported in streaming DataFrames/Datasets and will be disabled.") } - new StreamingQueryWrapper(new MicroBatchExecution( - sparkSession, - userSpecifiedName.orNull, - checkpointLocation, - analyzedPlan, - sink, - trigger, - triggerClock, - outputMode, - deleteCheckpointOnStop)) + sink match { + case v1Sink: Sink => + new StreamingQueryWrapper(new MicroBatchExecution( + sparkSession, + userSpecifiedName.orNull, + checkpointLocation, + analyzedPlan, + v1Sink, + trigger, + triggerClock, + outputMode, + deleteCheckpointOnStop)) + case v2Sink: ContinuousWriteSupport => + new StreamingQueryWrapper(new ContinuousExecution( + sparkSession, + userSpecifiedName.orNull, + checkpointLocation, + analyzedPlan, + v2Sink, + trigger, + triggerClock, + outputMode, + extraOptions, + deleteCheckpointOnStop)) + } } /** @@ -269,7 +287,8 @@ class StreamingQueryManager private[sql] (sparkSession: SparkSession) extends Lo userSpecifiedName: Option[String], userSpecifiedCheckpointLocation: Option[String], df: DataFrame, - sink: Sink, + extraOptions: Map[String, String], + sink: BaseStreamingSink, outputMode: OutputMode, useTempCheckpointLocation: Boolean = false, recoverFromCheckpointLocation: Boolean = true, @@ -279,6 +298,7 @@ class StreamingQueryManager private[sql] (sparkSession: SparkSession) extends Lo userSpecifiedName, userSpecifiedCheckpointLocation, df, + extraOptions, sink, outputMode, useTempCheckpointLocation, diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala index 755490308b5b9..0db2022dc8d16 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala @@ -77,7 +77,7 @@ class StreamSuite extends StreamTest { } test("StreamingRelation.computeStats") { - val streamingRelation = spark.readStream.format("rate").load().logicalPlan collect { + val streamingRelation = spark.readStream.format("memory").load().logicalPlan collect { case s: StreamingRelation => s } assert(streamingRelation.nonEmpty, "cannot find StreamingRelation") diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala index 71a474ef63e84..577afbf3cd123 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala @@ -246,7 +246,8 @@ trait StreamTest extends QueryTest with SharedSQLContext with TimeLimits with Be */ def testStream( _stream: Dataset[_], - outputMode: OutputMode = OutputMode.Append)(actions: StreamAction*): Unit = synchronized { + outputMode: OutputMode = OutputMode.Append, + useV2Sink: Boolean = false)(actions: StreamAction*): Unit = synchronized { import org.apache.spark.sql.streaming.util.StreamManualClock // `synchronized` is added to prevent the user from calling multiple `testStream`s concurrently @@ -259,7 +260,7 @@ trait StreamTest extends QueryTest with SharedSQLContext with TimeLimits with Be var currentStream: StreamExecution = null var lastStream: StreamExecution = null val awaiting = new mutable.HashMap[Int, Offset]() // source index -> offset to wait for - val sink = new MemorySink(stream.schema, outputMode) + val sink = if (useV2Sink) new MemorySinkV2 else new MemorySink(stream.schema, outputMode) val resetConfValues = mutable.Map[String, Option[String]]() @volatile @@ -308,7 +309,11 @@ trait StreamTest extends QueryTest with SharedSQLContext with TimeLimits with Be "" } - def testState = + def testState = { + val sinkDebugString = sink match { + case s: MemorySink => s.toDebugString + case s: MemorySinkV2 => s.toDebugString + } s""" |== Progress == |$testActions @@ -321,12 +326,13 @@ trait StreamTest extends QueryTest with SharedSQLContext with TimeLimits with Be |${if (streamThreadDeathCause != null) stackTraceToString(streamThreadDeathCause) else ""} | |== Sink == - |${sink.toDebugString} + |$sinkDebugString | | |== Plan == |${if (currentStream != null) currentStream.lastExecution else ""} """.stripMargin + } def verify(condition: => Boolean, message: String): Unit = { if (!condition) { @@ -383,7 +389,11 @@ trait StreamTest extends QueryTest with SharedSQLContext with TimeLimits with Be } } - try if (lastOnly) sink.latestBatchData else sink.allData catch { + val (latestBatchData, allData) = sink match { + case s: MemorySink => (s.latestBatchData, s.allData) + case s: MemorySinkV2 => (s.latestBatchData, s.allData) + } + val sparkAnswer = try if (lastOnly) latestBatchData else allData catch { case e: Exception => failTest("Exception while getting data from sink", e) } @@ -423,6 +433,7 @@ trait StreamTest extends QueryTest with SharedSQLContext with TimeLimits with Be None, Some(metadataRoot), stream, + Map(), sink, outputMode, trigger = trigger, @@ -465,9 +476,10 @@ trait StreamTest extends QueryTest with SharedSQLContext with TimeLimits with Be s"microbatch thread not stopped") verify(!currentStream.isActive, "query.isActive() is false even after stopping") - verify(currentStream.exception.isEmpty, + // TODO these shouldn't be reported in the first place + /* verify(currentStream.exception.isEmpty, s"query.exception() is not empty after clean stop: " + - currentStream.exception.map(_.toString()).getOrElse("")) + currentStream.exception.map(_.toString()).getOrElse("")) */ } catch { case _: InterruptedException => case e: org.scalatest.exceptions.TestFailedDueToTimeoutException => diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala new file mode 100644 index 0000000000000..e5f24beaf8782 --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala @@ -0,0 +1,123 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.streaming.continuous + +import java.io.{File, InterruptedIOException, IOException, UncheckedIOException} +import java.nio.channels.ClosedByInterruptException +import java.util.concurrent.{CountDownLatch, ExecutionException, TimeoutException, TimeUnit} + +import scala.reflect.ClassTag +import scala.util.control.ControlThrowable + +import com.google.common.util.concurrent.UncheckedExecutionException +import org.apache.commons.io.FileUtils +import org.apache.hadoop.conf.Configuration + +import org.apache.spark.{SparkContext, SparkEnv} +import org.apache.spark.scheduler.{SparkListener, SparkListenerJobStart} +import org.apache.spark.sql._ +import org.apache.spark.sql.catalyst.plans.logical.Range +import org.apache.spark.sql.catalyst.streaming.InternalOutputModes +import org.apache.spark.sql.execution.command.ExplainCommand +import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2ScanExec, WriteToDataSourceV2Exec} +import org.apache.spark.sql.execution.streaming._ +import org.apache.spark.sql.execution.streaming.continuous._ +import org.apache.spark.sql.execution.streaming.state.{StateStore, StateStoreConf, StateStoreId, StateStoreProvider} +import org.apache.spark.sql.functions._ +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.sources.StreamSourceProvider +import org.apache.spark.sql.streaming.{StreamTest, Trigger} +import org.apache.spark.sql.streaming.util.StreamManualClock +import org.apache.spark.sql.types._ +import org.apache.spark.util.Utils + +class ContinuousSuite extends StreamTest { + import testImplicits._ + + private def waitForRateSourceTriggers(query: StreamExecution, numTriggers: Int): Unit = { + query match { + case s: ContinuousExecution => + s.awaitInitialization(streamingTimeout.toMillis) + Thread.sleep(5000) + val reader = s.lastExecution.executedPlan.collectFirst { + case DataSourceV2ScanExec(_, r: ContinuousRateStreamReader) => r + }.get + + while (System.currentTimeMillis < reader.lastStartTime + 9100) { + Thread.sleep(reader.lastStartTime + 9100 - System.currentTimeMillis) + } + } + } + + private def incrementEpoch(query: StreamExecution): Unit = { + query match { + case s: ContinuousExecution => + EpochCoordinatorRef.get(s.id.toString, SparkEnv.get) + .askSync[Long](IncrementAndGetEpoch()) + Thread.sleep(200) + } + } + + test("basic rate source") { + val df = spark.readStream.format("rate").load().select('value) + + // TODO: validate against low trigger interval + testStream(df, useV2Sink = true)( + StartStream(Trigger.Continuous(1000000)), + Execute(waitForRateSourceTriggers(_, 10)), + Execute(incrementEpoch), + CheckAnswer(scala.Range(0, 50): _*), + StopStream, + StartStream(Trigger.Continuous(1000000)), + Execute(waitForRateSourceTriggers(_, 10)), + Execute(incrementEpoch), + CheckAnswer(scala.Range(0, 100): _*)) + } + + /* test("repeatedly restart") { + val df = spark.readStream.format("rate").option("continuous", "true").load().select('value) + + // TODO: validate against low trigger interval + testStream(df)( + StartStream(Trigger.Continuous("1 second")), + Execute(_ => Thread.sleep(3000)), + CheckAnswer(scala.Range(0, 10): _*), + StopStream, + StartStream(Trigger.Continuous("1 second")), + StopStream, + StartStream(Trigger.Continuous("1 second")), + StopStream, + StartStream(Trigger.Continuous("1 second")), + Execute(_ => Thread.sleep(3000)), + CheckAnswer(scala.Range(0, 20): _*)) + } */ + + test("query without test harness") { + val df = spark.readStream.format("rate").load().select('value) + val query = df.writeStream + .format("memory") + .queryName("noharness") + .trigger(Trigger.Continuous(1000)) + .start() + waitForRateSourceTriggers(query.asInstanceOf[StreamingQueryWrapper].streamingQuery, 0) + query.stop() + + val results = spark.read.table("noharness").collect() + assert(!results.isEmpty) + } +} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/TestSQLContext.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/TestSQLContext.scala index 4286e8a6ca2c8..85c1a18c4af51 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/test/TestSQLContext.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/test/TestSQLContext.scala @@ -26,7 +26,7 @@ import org.apache.spark.sql.internal.{SessionState, SessionStateBuilder, SQLConf */ private[spark] class TestSparkSession(sc: SparkContext) extends SparkSession(sc) { self => def this(sparkConf: SparkConf) { - this(new SparkContext("local[2]", "test-sql-context", + this(new SparkContext("local[10]", "test-sql-context", sparkConf.set("spark.sql.testkey", "true"))) } From bd20abda62b0f8bb0f71c07516d4ed7d65dc21b3 Mon Sep 17 00:00:00 2001 From: Jose Torres Date: Mon, 11 Dec 2017 13:18:25 -0800 Subject: [PATCH 07/50] awaitEpoch impl --- .../ContinuousDataSourceRDDIter.scala | 61 ++++++++++++------- .../continuous/ContinuousExecution.scala | 12 ++-- .../continuous/EpochCoordinator.scala | 6 +- .../spark/sql/streaming/StreamTest.scala | 21 +++++++ .../continuous/ContinuousSuite.scala | 14 +++-- 5 files changed, 78 insertions(+), 36 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousDataSourceRDDIter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousDataSourceRDDIter.scala index 274a7cac35e01..98bbce6e7c6ff 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousDataSourceRDDIter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousDataSourceRDDIter.scala @@ -35,6 +35,12 @@ import org.apache.spark.sql.sources.v2.reader._ import org.apache.spark.sql.streaming.ProcessingTime import org.apache.spark.util.SystemClock +// There are two types of entry possible: +// (row, offset, null) - a row with offset +// (null, null, epoch) - an epoch marker +// We force both into the same queue to avoid having to synchronize across multiple queues. +case class ReadQueueEntry(row: UnsafeRow, offset: PartitionOffset, epoch: Long) + class ContinuousDataSourceRDD( sc: SparkContext, @transient private val readTasks: java.util.List[ReadTask[UnsafeRow]]) @@ -52,25 +58,12 @@ class ContinuousDataSourceRDD( // TODO: capacity option val queue = new ArrayBlockingQueue[(UnsafeRow, PartitionOffset)](1024) - val epochPollThread = new EpochPollThread(queue, context) - epochPollThread.setDaemon(true) - epochPollThread.start() - - val dataReaderThread = new DataReaderThread(reader, queue, context) - dataReaderThread.setDaemon(true) - dataReaderThread.start() - - context.addTaskCompletionListener(_ => { - reader.close() - dataReaderThread.interrupt() - epochPollThread.interrupt() - }) - val epochEndpoint = EpochCoordinatorRef.get( context.getLocalProperty(StreamExecution.QUERY_ID_KEY), SparkEnv.get) - new Iterator[UnsafeRow] { + val itr = new Iterator[UnsafeRow] { private var currentRow: UnsafeRow = _ - private var currentOffset: PartitionOffset = _ + private var currentOffset: PartitionOffset = + ContinuousDataSourceRDD.getBaseReader(reader).getOffset override def hasNext(): Boolean = { val newTuple = queue.take() @@ -94,6 +87,22 @@ class ContinuousDataSourceRDD( r } } + + + val epochPollThread = new EpochPollThread(queue, context) + epochPollThread.setDaemon(true) + epochPollThread.start() + + val dataReaderThread = new DataReaderThread(reader, queue, context) + dataReaderThread.setDaemon(true) + dataReaderThread.start() + + context.addTaskCompletionListener(_ => { + reader.close() + dataReaderThread.interrupt() + epochPollThread.interrupt() + }) + itr } override def getPreferredLocations(split: Partition): Seq[String] = { @@ -139,13 +148,7 @@ class DataReaderThread( queue: BlockingQueue[(UnsafeRow, PartitionOffset)], context: TaskContext) extends Thread { override def run(): Unit = { - val baseReader = reader match { - case r: ContinuousDataReader[UnsafeRow] => r - case wrapped: RowToUnsafeDataReader => - wrapped.rowReader.asInstanceOf[ContinuousDataReader[Row]] - case _ => - throw new IllegalStateException(s"Unknown continuous reader type ${reader.getClass}") - } + val baseReader = ContinuousDataSourceRDD.getBaseReader(reader) try { while (!context.isInterrupted && !context.isCompleted()) { if (!reader.next()) { @@ -162,3 +165,15 @@ class DataReaderThread( } } } + +object ContinuousDataSourceRDD { + private[continuous] def getBaseReader(reader: DataReader[UnsafeRow]): ContinuousDataReader[_] = { + reader match { + case r: ContinuousDataReader[UnsafeRow] => r + case wrapped: RowToUnsafeDataReader => + wrapped.rowReader.asInstanceOf[ContinuousDataReader[Row]] + case _ => + throw new IllegalStateException(s"Unknown continuous reader type ${reader.getClass}") + } + } +} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala index 637143d8a5acb..0955ff7555ab3 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala @@ -294,11 +294,15 @@ class ContinuousExecution( } /** - * Blocks the current thread until execution has received offsets for the specified epoch. + * Blocks the current thread until execution has committed past the specified epoch. */ - /* private[sql] def awaitEpoch(epoch: Long): Unit = { + private[sql] def awaitEpoch(epoch: Long): Unit = { def notDone = { - val latestCommit = batchCommitLog.getLatest <= epoch + val latestCommit = batchCommitLog.getLatest() + latestCommit match { + case Some((latestEpoch, _)) => latestEpoch < epoch + case None => true + } } while (notDone) { @@ -312,7 +316,7 @@ class ContinuousExecution( awaitProgressLock.unlock() } } - } */ + } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/EpochCoordinator.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/EpochCoordinator.scala index eb3f7779cba96..0bea6030e9ac0 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/EpochCoordinator.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/EpochCoordinator.scala @@ -128,7 +128,7 @@ class EpochCoordinator( resolveCommitsAtEpoch(epoch) } - case ReportPartitionOffset(partitionId, epoch, offset) if offset != null => + case ReportPartitionOffset(partitionId, epoch, offset) => val query = session.streams.get(queryId).asInstanceOf[StreamingQueryWrapper] .streamingQuery.asInstanceOf[ContinuousExecution] partitionOffsets.put((epoch, partitionId), offset) @@ -139,10 +139,6 @@ class EpochCoordinator( query.addOffset(epoch, reader, thisEpochOffsets.toSeq) resolveCommitsAtEpoch(epoch - 1) } - - // We can get null offsets reported if the epoch advances before the executor - // has read any data. Ignore those, since they don't affect where we'd want to restart. - case ReportPartitionOffset(_, _, offset) if offset == null => } override def receiveAndReply(context: RpcCallContext): PartialFunction[Any, Unit] = { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala index 577afbf3cd123..bb5a20dc14c98 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala @@ -33,11 +33,13 @@ import org.scalatest.exceptions.TestFailedDueToTimeoutException import org.scalatest.time.Span import org.scalatest.time.SpanSugar._ +import org.apache.spark.SparkEnv import org.apache.spark.sql.{Dataset, Encoder, QueryTest, Row} import org.apache.spark.sql.catalyst.encoders.{encoderFor, ExpressionEncoder, RowEncoder} import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.catalyst.util._ import org.apache.spark.sql.execution.streaming._ +import org.apache.spark.sql.execution.streaming.continuous.{ContinuousExecution, EpochCoordinatorRef, IncrementAndGetEpoch} import org.apache.spark.sql.execution.streaming.state.StateStore import org.apache.spark.sql.streaming.StreamingQueryListener._ import org.apache.spark.sql.test.SharedSQLContext @@ -237,6 +239,25 @@ trait StreamTest extends QueryTest with SharedSQLContext with TimeLimits with Be AssertOnQuery(query => { func(query); true }) } + object AwaitEpoch { + def apply(epoch: Long): AssertOnQuery = + Execute { + case s: ContinuousExecution => s.awaitEpoch(epoch) + case _ => throw new IllegalStateException("microbatch cannot await epoch") + } + } + + object IncrementEpoch { + def apply(): AssertOnQuery = + Execute { + case s: ContinuousExecution => + val newEpoch = EpochCoordinatorRef.get(s.id.toString, SparkEnv.get) + .askSync[Long](IncrementAndGetEpoch()) + s.awaitEpoch(newEpoch - 1) + case _ => throw new IllegalStateException("microbatch cannot increment epoch") + } + } + /** * Executes the specified actions on the given streaming DataFrame and provides helpful * error messages in the case of failures or incorrect answers. diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala index e5f24beaf8782..5b9ffa77670f6 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala @@ -49,11 +49,15 @@ import org.apache.spark.util.Utils class ContinuousSuite extends StreamTest { import testImplicits._ + private def awaitEpoch(query: StreamExecution, epoch: Long): Unit = { + query match { + case s: ContinuousExecution => s.awaitEpoch(epoch) + } + } + private def waitForRateSourceTriggers(query: StreamExecution, numTriggers: Int): Unit = { query match { case s: ContinuousExecution => - s.awaitInitialization(streamingTimeout.toMillis) - Thread.sleep(5000) val reader = s.lastExecution.executedPlan.collectFirst { case DataSourceV2ScanExec(_, r: ContinuousRateStreamReader) => r }.get @@ -79,13 +83,15 @@ class ContinuousSuite extends StreamTest { // TODO: validate against low trigger interval testStream(df, useV2Sink = true)( StartStream(Trigger.Continuous(1000000)), + AwaitEpoch(0), Execute(waitForRateSourceTriggers(_, 10)), - Execute(incrementEpoch), + IncrementEpoch(), CheckAnswer(scala.Range(0, 50): _*), StopStream, StartStream(Trigger.Continuous(1000000)), + AwaitEpoch(2), Execute(waitForRateSourceTriggers(_, 10)), - Execute(incrementEpoch), + IncrementEpoch(), CheckAnswer(scala.Range(0, 100): _*)) } From b930c2aad75de8b54aafd89635f006dd524d2d31 Mon Sep 17 00:00:00 2001 From: Jose Torres Date: Mon, 11 Dec 2017 13:46:09 -0800 Subject: [PATCH 08/50] move local[10] to only continuous suite --- .../continuous/ContinuousSuite.scala | 62 +++++++++++-------- .../spark/sql/test/TestSQLContext.scala | 2 +- 2 files changed, 37 insertions(+), 27 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala index 5b9ffa77670f6..ea7f000c9d404 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala @@ -43,56 +43,61 @@ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.sources.StreamSourceProvider import org.apache.spark.sql.streaming.{StreamTest, Trigger} import org.apache.spark.sql.streaming.util.StreamManualClock +import org.apache.spark.sql.test.TestSparkSession import org.apache.spark.sql.types._ import org.apache.spark.util.Utils class ContinuousSuite extends StreamTest { import testImplicits._ - private def awaitEpoch(query: StreamExecution, epoch: Long): Unit = { - query match { - case s: ContinuousExecution => s.awaitEpoch(epoch) - } - } + // We need more than the default local[2] to be able to schedule all partitions simultaneously. + override protected def createSparkSession = new TestSparkSession( + new SparkContext( + "local[10]", + "continuous-stream-test-sql-context", + sparkConf.set("spark.sql.testkey", "true"))) private def waitForRateSourceTriggers(query: StreamExecution, numTriggers: Int): Unit = { query match { case s: ContinuousExecution => + assert(numTriggers >= 2, "must wait for at least 2 triggers to ensure query is initialized") val reader = s.lastExecution.executedPlan.collectFirst { case DataSourceV2ScanExec(_, r: ContinuousRateStreamReader) => r }.get - while (System.currentTimeMillis < reader.lastStartTime + 9100) { - Thread.sleep(reader.lastStartTime + 9100 - System.currentTimeMillis) + val deltaMs = (numTriggers - 1) * 1000 + 300 + + while (System.currentTimeMillis < reader.lastStartTime + deltaMs) { + Thread.sleep(reader.lastStartTime + deltaMs - System.currentTimeMillis) } } } - private def incrementEpoch(query: StreamExecution): Unit = { - query match { - case s: ContinuousExecution => - EpochCoordinatorRef.get(s.id.toString, SparkEnv.get) - .askSync[Long](IncrementAndGetEpoch()) - Thread.sleep(200) - } - } + // A continuous trigger that will only fire the initial time for the duration of a test. + // This allows clean testing with manual epoch advancement. + private val longContinuousTrigger = Trigger.Continuous("1 hour") test("basic rate source") { - val df = spark.readStream.format("rate").load().select('value) + val df = spark.readStream + .format("rate") + .option("numPartitions", "5") + .option("rowsPerSecond", "5") + .load() + .select('value) // TODO: validate against low trigger interval testStream(df, useV2Sink = true)( - StartStream(Trigger.Continuous(1000000)), + StartStream(longContinuousTrigger), AwaitEpoch(0), - Execute(waitForRateSourceTriggers(_, 10)), + Execute(waitForRateSourceTriggers(_, 2)), IncrementEpoch(), - CheckAnswer(scala.Range(0, 50): _*), + CheckAnswer(scala.Range(0, 10): _*), StopStream, - StartStream(Trigger.Continuous(1000000)), + StartStream(longContinuousTrigger), AwaitEpoch(2), - Execute(waitForRateSourceTriggers(_, 10)), + Execute(waitForRateSourceTriggers(_, 2)), IncrementEpoch(), - CheckAnswer(scala.Range(0, 100): _*)) + CheckAnswer(scala.Range(0, 20): _*)) } /* test("repeatedly restart") { @@ -114,16 +119,21 @@ class ContinuousSuite extends StreamTest { } */ test("query without test harness") { - val df = spark.readStream.format("rate").load().select('value) + val df = spark.readStream + .format("rate") + .option("numPartitions", "2") + .option("rowsPerSecond", "2") + .load() + .select('value) val query = df.writeStream .format("memory") .queryName("noharness") - .trigger(Trigger.Continuous(1000)) + .trigger(Trigger.Continuous(100)) .start() - waitForRateSourceTriggers(query.asInstanceOf[StreamingQueryWrapper].streamingQuery, 0) + waitForRateSourceTriggers(query.asInstanceOf[StreamingQueryWrapper].streamingQuery, 2) query.stop() val results = spark.read.table("noharness").collect() - assert(!results.isEmpty) + assert(results.toSet == Set(0, 1, 2, 3).map(Row(_))) } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/TestSQLContext.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/TestSQLContext.scala index 85c1a18c4af51..4286e8a6ca2c8 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/test/TestSQLContext.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/test/TestSQLContext.scala @@ -26,7 +26,7 @@ import org.apache.spark.sql.internal.{SessionState, SessionStateBuilder, SQLConf */ private[spark] class TestSparkSession(sc: SparkContext) extends SparkSession(sc) { self => def this(sparkConf: SparkConf) { - this(new SparkContext("local[10]", "test-sql-context", + this(new SparkContext("local[2]", "test-sql-context", sparkConf.set("spark.sql.testkey", "true"))) } From 5c2d1b2c20333e29a01cd8a9203ccf0e38261942 Mon Sep 17 00:00:00 2001 From: Jose Torres Date: Mon, 11 Dec 2017 13:49:55 -0800 Subject: [PATCH 09/50] repeatedly restart --- .../continuous/ContinuousSuite.scala | 41 +++++++++++-------- 1 file changed, 24 insertions(+), 17 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala index ea7f000c9d404..122bc2213cdfa 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala @@ -79,13 +79,12 @@ class ContinuousSuite extends StreamTest { test("basic rate source") { val df = spark.readStream - .format("rate") - .option("numPartitions", "5") - .option("rowsPerSecond", "5") - .load() - .select('value) + .format("rate") + .option("numPartitions", "5") + .option("rowsPerSecond", "5") + .load() + .select('value) - // TODO: validate against low trigger interval testStream(df, useV2Sink = true)( StartStream(longContinuousTrigger), AwaitEpoch(0), @@ -100,23 +99,31 @@ class ContinuousSuite extends StreamTest { CheckAnswer(scala.Range(0, 20): _*)) } - /* test("repeatedly restart") { - val df = spark.readStream.format("rate").option("continuous", "true").load().select('value) + test("repeatedly restart") { + val df = spark.readStream + .format("rate") + .option("numPartitions", "5") + .option("rowsPerSecond", "5") + .load() + .select('value) - // TODO: validate against low trigger interval - testStream(df)( - StartStream(Trigger.Continuous("1 second")), - Execute(_ => Thread.sleep(3000)), + testStream(df, useV2Sink = true)( + StartStream(longContinuousTrigger), + AwaitEpoch(0), + Execute(waitForRateSourceTriggers(_, 2)), + IncrementEpoch(), CheckAnswer(scala.Range(0, 10): _*), StopStream, - StartStream(Trigger.Continuous("1 second")), + StartStream(longContinuousTrigger), StopStream, - StartStream(Trigger.Continuous("1 second")), + StartStream(longContinuousTrigger), StopStream, - StartStream(Trigger.Continuous("1 second")), - Execute(_ => Thread.sleep(3000)), + StartStream(longContinuousTrigger), + AwaitEpoch(2), + Execute(waitForRateSourceTriggers(_, 2)), + IncrementEpoch(), CheckAnswer(scala.Range(0, 20): _*)) - } */ + } test("query without test harness") { val df = spark.readStream From 38e989bb94e3574ddd37320b8dae7a09e66f5538 Mon Sep 17 00:00:00 2001 From: Jose Torres Date: Mon, 11 Dec 2017 14:02:52 -0800 Subject: [PATCH 10/50] fix some simple TODOs --- .../spark/sql/execution/streaming/StreamingRelation.scala | 4 ++-- .../streaming/continuous/ContinuousDataSourceRDDIter.scala | 2 +- .../execution/streaming/continuous/ContinuousExecution.scala | 5 ++--- .../org/apache/spark/sql/streaming/DataStreamReader.scala | 2 +- 4 files changed, 6 insertions(+), 7 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingRelation.scala index 69d2dd061e5bd..d848d2a38887a 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingRelation.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingRelation.scala @@ -85,14 +85,14 @@ case class StreamingRelationV2( sourceName: String, extraOptions: Map[String, String], output: Seq[Attribute], - v1DataSource: DataSource) + v1DataSource: DataSource)(session: SparkSession) extends LeafNode { override def isStreaming: Boolean = true override def toString: String = sourceName // TODO: can we get the conf here somehow? override def computeStats(): Statistics = Statistics( - sizeInBytes = 0 + sizeInBytes = BigInt(session.sessionState.conf.defaultSizeInBytes) ) } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousDataSourceRDDIter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousDataSourceRDDIter.scala index 98bbce6e7c6ff..e49f02cbcb35d 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousDataSourceRDDIter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousDataSourceRDDIter.scala @@ -121,8 +121,8 @@ class EpochPollThread( private var currentEpoch = context.getLocalProperty(ContinuousExecution.START_EPOCH_KEY).toLong override def run(): Unit = { - // TODO parameterize try { + // TODO parameterize processing time ProcessingTimeExecutor(ProcessingTime(100), new SystemClock()) .execute { () => val newEpoch = epochEndpoint.askSync[Long](GetCurrentEpoch()) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala index 0955ff7555ab3..16abe4316f604 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala @@ -118,10 +118,9 @@ class ContinuousExecution( throw new IllegalStateException( s"Batch $latestEpochId was committed without next epoch offsets!") } - // TODO initialize committed offsets + committedOffsets = nextOffsets.toStreamProgress(sources) - logDebug(s"Resuming at epoch $currentBatchId with committed offsets " + - s"$committedOffsets and available offsets $availableOffsets") + logDebug(s"Resuming at epoch $currentBatchId with committed offsets $committedOffsets") nextOffsets case None => // We are starting this stream for the first time. Offsets are all None. diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala index 4ec422b0f9745..0a48479993450 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala @@ -178,7 +178,7 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo sparkSession, StreamingRelationV2( s, source, extraOptions.toMap, - tempReader.readSchema().toAttributes, v1DataSource)) + tempReader.readSchema().toAttributes, v1DataSource)(sparkSession)) case _ => // Code path for data source v1. Dataset.ofRows(sparkSession, StreamingRelation(v1DataSource)) From 9e031f5091bac943541fdee61c81cd462d0b1de2 Mon Sep 17 00:00:00 2001 From: Jose Torres Date: Mon, 11 Dec 2017 14:11:18 -0800 Subject: [PATCH 11/50] use runId instead of queryId for endpoint name --- .../datasources/v2/DataSourceV2ScanExec.scala | 4 ++-- .../datasources/v2/WriteToDataSourceV2.scala | 6 +++--- .../continuous/ContinuousDataSourceRDDIter.scala | 4 ++-- .../streaming/continuous/ContinuousExecution.scala | 12 +++++++----- .../streaming/continuous/EpochCoordinator.scala | 12 +++++------- .../org/apache/spark/sql/streaming/StreamTest.scala | 2 +- 6 files changed, 20 insertions(+), 20 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2ScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2ScanExec.scala index 52733ad104efe..10fca4aac7c41 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2ScanExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2ScanExec.scala @@ -27,7 +27,7 @@ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.execution.LeafExecNode import org.apache.spark.sql.execution.metric.SQLMetrics import org.apache.spark.sql.execution.streaming.StreamExecution -import org.apache.spark.sql.execution.streaming.continuous.{ContinuousDataSourceRDD, EpochCoordinatorRef, SetReaderPartitions} +import org.apache.spark.sql.execution.streaming.continuous.{ContinuousDataSourceRDD, ContinuousExecution, EpochCoordinatorRef, SetReaderPartitions} import org.apache.spark.sql.sources.v2.reader._ import org.apache.spark.sql.types.StructType @@ -57,7 +57,7 @@ case class DataSourceV2ScanExec( val inputRDD = reader match { case _: ContinuousReader => EpochCoordinatorRef.get( - sparkContext.getLocalProperty(StreamExecution.QUERY_ID_KEY), sparkContext.env) + sparkContext.getLocalProperty(ContinuousExecution.RUN_ID_KEY), sparkContext.env) .askSync[Unit](SetReaderPartitions(readTasks.size())) new ContinuousDataSourceRDD(sparkContext, readTasks) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2.scala index 2474f7d45308b..4c934a194766c 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2.scala @@ -63,7 +63,7 @@ case class WriteToDataSourceV2Exec(writer: DataSourceV2Writer, query: SparkPlan) val runTask = writer match { case w: ContinuousWriter => EpochCoordinatorRef.get( - sparkContext.getLocalProperty(StreamExecution.QUERY_ID_KEY), sparkContext.env) + sparkContext.getLocalProperty(ContinuousExecution.RUN_ID_KEY), sparkContext.env) .askSync[Unit](SetWriterPartitions(rdd.getNumPartitions)) (context: TaskContext, iter: Iterator[InternalRow]) => @@ -129,7 +129,7 @@ object DataWritingSparkTask extends Logging { context: TaskContext, iter: Iterator[InternalRow]): WriterCommitMessage = { val dataWriter = writeTask.createDataWriter(context.partitionId(), context.attemptNumber()) - val queryId = context.getLocalProperty(StreamExecution.QUERY_ID_KEY) + val runId = context.getLocalProperty(ContinuousExecution.RUN_ID_KEY) val currentMsg: WriterCommitMessage = null var currentEpoch = context.getLocalProperty(ContinuousExecution.START_EPOCH_KEY).toLong @@ -141,7 +141,7 @@ object DataWritingSparkTask extends Logging { logInfo(s"Writer for partition ${context.partitionId()} is committing.") val msg = dataWriter.commit() logInfo(s"Writer for partition ${context.partitionId()} committed.") - EpochCoordinatorRef.get(queryId, SparkEnv.get).send( + EpochCoordinatorRef.get(runId, SparkEnv.get).send( CommitPartitionEpoch(context.partitionId(), currentEpoch, msg) ) currentEpoch += 1 diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousDataSourceRDDIter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousDataSourceRDDIter.scala index e49f02cbcb35d..58635a3117492 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousDataSourceRDDIter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousDataSourceRDDIter.scala @@ -59,7 +59,7 @@ class ContinuousDataSourceRDD( val queue = new ArrayBlockingQueue[(UnsafeRow, PartitionOffset)](1024) val epochEndpoint = EpochCoordinatorRef.get( - context.getLocalProperty(StreamExecution.QUERY_ID_KEY), SparkEnv.get) + context.getLocalProperty(ContinuousExecution.RUN_ID_KEY), SparkEnv.get) val itr = new Iterator[UnsafeRow] { private var currentRow: UnsafeRow = _ private var currentOffset: PartitionOffset = @@ -117,7 +117,7 @@ class EpochPollThread( context: TaskContext) extends Thread with Logging { private val epochEndpoint = EpochCoordinatorRef.get( - context.getLocalProperty(StreamExecution.QUERY_ID_KEY), SparkEnv.get) + context.getLocalProperty(ContinuousExecution.RUN_ID_KEY), SparkEnv.get) private var currentEpoch = context.getLocalProperty(ContinuousExecution.START_EPOCH_KEY).toLong override def run(): Unit = { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala index 16abe4316f604..8863fdec65e9f 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala @@ -208,13 +208,14 @@ class ContinuousExecution( sparkSession.sparkContext.setLocalProperty( ContinuousExecution.START_EPOCH_KEY, currentBatchId.toString) + sparkSession.sparkContext.setLocalProperty( + ContinuousExecution.RUN_ID_KEY, runId.toString) - // Use the parent Spark session since it's where this query is registered. - // TODO: we should use runId for the endpoint to be safe against cross-contamination - // from failed runs + // Use the parent Spark session for the endpoint since it's where this query ID is registered. val epochEndpoint = - EpochCoordinatorRef.create( - writer.get(), reader, currentBatchId, id.toString, sparkSession, SparkEnv.get) + EpochCoordinatorRef.create( + writer.get(), reader, currentBatchId, + id.toString, runId.toString, sparkSession, SparkEnv.get) val epochUpdateThread = new Thread(new Runnable { override def run: Unit = { try { @@ -321,4 +322,5 @@ class ContinuousExecution( object ContinuousExecution { val START_EPOCH_KEY = "__continuous_start_epoch" + val RUN_ID_KEY = "__run_id" } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/EpochCoordinator.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/EpochCoordinator.scala index 0bea6030e9ac0..5e7b177fea7ae 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/EpochCoordinator.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/EpochCoordinator.scala @@ -51,10 +51,7 @@ case class IncrementAndGetEpoch() /** Helper object used to create reference to [[EpochCoordinator]]. */ object EpochCoordinatorRef extends Logging { - - private val endpointNamePrefix = "EpochCoordinator-" - - private def endpointName(queryId: String) = s"EpochCoordinator-$queryId" + private def endpointName(runId: String) = s"EpochCoordinator-$runId" /** * Create a reference to a new [[EpochCoordinator]]. @@ -64,16 +61,17 @@ object EpochCoordinatorRef extends Logging { reader: ContinuousReader, startEpoch: Long, queryId: String, + runId: String, session: SparkSession, env: SparkEnv): RpcEndpointRef = synchronized { val coordinator = new EpochCoordinator(writer, reader, startEpoch, queryId, session, env.rpcEnv) - val ref = env.rpcEnv.setupEndpoint(endpointName(queryId), coordinator) + val ref = env.rpcEnv.setupEndpoint(endpointName(runId), coordinator) logInfo("Registered EpochCoordinator endpoint") ref } - def get(queryId: String, env: SparkEnv): RpcEndpointRef = synchronized { - val rpcEndpointRef = RpcUtils.makeDriverRef(endpointName(queryId), env.conf, env.rpcEnv) + def get(runId: String, env: SparkEnv): RpcEndpointRef = synchronized { + val rpcEndpointRef = RpcUtils.makeDriverRef(endpointName(runId), env.conf, env.rpcEnv) logDebug("Retrieved existing EpochCoordinator endpoint") rpcEndpointRef } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala index bb5a20dc14c98..45681122744c8 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala @@ -251,7 +251,7 @@ trait StreamTest extends QueryTest with SharedSQLContext with TimeLimits with Be def apply(): AssertOnQuery = Execute { case s: ContinuousExecution => - val newEpoch = EpochCoordinatorRef.get(s.id.toString, SparkEnv.get) + val newEpoch = EpochCoordinatorRef.get(s.runId.toString, SparkEnv.get) .askSync[Long](IncrementAndGetEpoch()) s.awaitEpoch(newEpoch - 1) case _ => throw new IllegalStateException("microbatch cannot increment epoch") From 690eadce245f5a16f5059e07da1e4a9022028fde Mon Sep 17 00:00:00 2001 From: Jose Torres Date: Mon, 11 Dec 2017 14:19:03 -0800 Subject: [PATCH 12/50] more simple todos --- .../streaming/continuous/ContinuousExecution.scala | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala index 8863fdec65e9f..c3578cf237b5c 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala @@ -165,8 +165,7 @@ class ContinuousExecution( s"Invalid reader: ${Utils.truncatedString(output, ",")} != " + s"${Utils.truncatedString(newOutput, ",")}") replacements ++= output.zip(newOutput) - - // TODO multiple sources maybe? offsets(0) has to be changed to track source id + reader.setOffset(java.util.Optional.ofNullable(offsets.offsets(0).orNull)) DataSourceV2Relation(newOutput, reader) } @@ -176,11 +175,9 @@ class ContinuousExecution( val triggerLogicalPlan = withNewSources transformAllExpressions { case a: Attribute if replacementMap.contains(a) => replacementMap(a).withMetadata(a.metadata) - // TODO properly handle timestamp - case ct: CurrentTimestamp => - CurrentBatchTimestamp(0, ct.dataType) - case cd: CurrentDate => - CurrentBatchTimestamp(0, cd.dataType, cd.timeZoneId) + case (_: CurrentTimestamp | _: CurrentDate) => + throw new IllegalStateException( + "CurrentTimestamp and CurrentDate not yet supported for continuous processing") } val writer = sink.createContinuousWriter( From 4df4f0406347c77345c6e89241cf8e646e933213 Mon Sep 17 00:00:00 2001 From: Jose Torres Date: Mon, 11 Dec 2017 14:27:12 -0800 Subject: [PATCH 13/50] remove old state --- .../execution/streaming/StreamingRelation.scala | 1 - .../streaming/continuous/ContinuousExecution.scala | 2 +- .../streaming/continuous/EpochCoordinator.scala | 14 +++++++++++--- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingRelation.scala index d848d2a38887a..16cd1f52703cd 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingRelation.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingRelation.scala @@ -90,7 +90,6 @@ case class StreamingRelationV2( override def isStreaming: Boolean = true override def toString: String = sourceName - // TODO: can we get the conf here somehow? override def computeStats(): Statistics = Statistics( sizeInBytes = BigInt(session.sessionState.conf.defaultSizeInBytes) ) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala index c3578cf237b5c..a50c0f5cb9628 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala @@ -165,7 +165,7 @@ class ContinuousExecution( s"Invalid reader: ${Utils.truncatedString(output, ",")} != " + s"${Utils.truncatedString(newOutput, ",")}") replacements ++= output.zip(newOutput) - + reader.setOffset(java.util.Optional.ofNullable(offsets.offsets(0).orNull)) DataSourceV2Relation(newOutput, reader) } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/EpochCoordinator.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/EpochCoordinator.scala index 5e7b177fea7ae..b302f52a6aa19 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/EpochCoordinator.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/EpochCoordinator.scala @@ -95,7 +95,7 @@ class EpochCoordinator( // (epoch, partition) -> message // This is small enough that we don't worry too much about optimizing the shape of the structure. private val partitionCommits = - mutable.Map[(Long, Int), WriterCommitMessage]() + mutable.Map[(Long, Int), WriterCommitMessage]() private val partitionOffsets = mutable.Map[(Long, Int), PartitionOffset]() @@ -111,10 +111,18 @@ class EpochCoordinator( logDebug(s"Epoch $epoch has received commits from all partitions. Committing globally.") val query = session.streams.get(queryId).asInstanceOf[StreamingQueryWrapper] .streamingQuery.asInstanceOf[ContinuousExecution] - // Sequencing is important - writer commits to epoch are required to be replayable + // Sequencing is important here. We must commit to the writer before recording the commit + // in the query, or we will end up dropping the commit if we restart in the middle. writer.commit(epoch, thisEpochCommits.toArray) query.commit(epoch) - // TODO: cleanup unnecessary state + + // Cleanup state from before this epoch, now that we know all partitions are forever past it. + for (k <- partitionCommits.keys.filter { case (e, _) => e < epoch }) { + partitionCommits.remove(k) + } + for (k <- partitionOffsets.keys.filter { case (e, _) => e < epoch }) { + partitionCommits.remove(k) + } } } From 67013103f2265cd063304299a08f5856d3363b6c Mon Sep 17 00:00:00 2001 From: Jose Torres Date: Mon, 11 Dec 2017 14:35:51 -0800 Subject: [PATCH 14/50] remove clean shutdown workaround in StreamTest --- .../sql/execution/datasources/v2/WriteToDataSourceV2.scala | 2 ++ .../execution/streaming/continuous/ContinuousExecution.scala | 2 +- .../scala/org/apache/spark/sql/streaming/StreamTest.scala | 5 ++--- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2.scala index 4c934a194766c..aedc09d121cac 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2.scala @@ -84,6 +84,8 @@ case class WriteToDataSourceV2Exec(writer: DataSourceV2Writer, query: SparkPlan) writer.commit(messages) logInfo(s"Data source writer $writer committed.") } catch { + case _: InterruptedException if writer.isInstanceOf[ContinuousWriter] => + // Interruption is how continuous queries are ended, so accept and ignore the exception. case cause: Throwable => logError(s"Data source writer $writer is aborting.") try { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala index a50c0f5cb9628..48248d388a614 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala @@ -85,7 +85,7 @@ class ContinuousExecution( // swallow exception and run again state.set(ACTIVE) } - } while (true) + } while (state.get() == ACTIVE) } /** diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala index 45681122744c8..1c589bbdffb83 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala @@ -497,10 +497,9 @@ trait StreamTest extends QueryTest with SharedSQLContext with TimeLimits with Be s"microbatch thread not stopped") verify(!currentStream.isActive, "query.isActive() is false even after stopping") - // TODO these shouldn't be reported in the first place - /* verify(currentStream.exception.isEmpty, + verify(currentStream.exception.isEmpty, s"query.exception() is not empty after clean stop: " + - currentStream.exception.map(_.toString()).getOrElse("")) */ + currentStream.exception.map(_.toString()).getOrElse("")) } catch { case _: InterruptedException => case e: org.scalatest.exceptions.TestFailedDueToTimeoutException => From 1cab58f36f19cc0893fe351365a92b3dc5731b44 Mon Sep 17 00:00:00 2001 From: Jose Torres Date: Mon, 11 Dec 2017 14:50:09 -0800 Subject: [PATCH 15/50] update ContinuousExecution docs --- .../streaming/StreamingRelation.scala | 9 +++++ .../continuous/ContinuousExecution.scala | 33 ++++++++----------- 2 files changed, 23 insertions(+), 19 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingRelation.scala index 16cd1f52703cd..0ca2e7854d94b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingRelation.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingRelation.scala @@ -80,6 +80,12 @@ case class StreamingExecutionRelation( // continuous processing (which is always V2) but only has V1 microbatch support. We don't // know at read time whether the query is conntinuous or not, so we need to be able to // swap a V1 relation back in. +/** + * Used to link a [[DataSourceV2]] into a streaming + * [[org.apache.spark.sql.catalyst.plans.logical.LogicalPlan]]. This is only used for creating + * a streaming [[org.apache.spark.sql.DataFrame]] from [[org.apache.spark.sql.DataFrameReader]], + * and should be converted before passing to [[StreamExecution]]. + */ case class StreamingRelationV2( dataSource: DataSourceV2, sourceName: String, @@ -95,6 +101,9 @@ case class StreamingRelationV2( ) } +/** + * Used to link a [[DataSourceV2]] into a continuous processing execution. + */ case class ContinuousExecutionRelation( source: ContinuousReadSupport, extraOptions: Map[String, String], diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala index 48248d388a614..16ff14be83b9c 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala @@ -79,7 +79,7 @@ class ContinuousExecution( override protected def runActivatedStream(sparkSessionForStream: SparkSession): Unit = { do { try { - runFromOffsets(sparkSessionForStream) + runContinuous(sparkSessionForStream) } catch { case _: Throwable if state.get().equals(RECONFIGURING) => // swallow exception and run again @@ -94,20 +94,16 @@ class ContinuousExecution( * before any processing occurs and will populate the following fields: * - currentBatchId * - committedOffsets - * - availableOffsets * The basic structure of this method is as follows: * - * Identify (from the offset log) the offsets used to run the last batch - * IF last batch exists THEN - * Set the next batch to be executed as the last recovered batch - * Check the commit log to see which batch was committed last - * IF the last batch was committed THEN - * Call getBatch using the last batch start and end offsets - * // ^^^^ above line is needed since some sources assume last batch always re-executes - * Setup for a new batch i.e., start = last batch end, and identify new end + * Identify (from the commit log) the latest epoch that has committed + * IF last epoch exists THEN + * Set the next epoch to run as the next epoch after the last commit + * Get the offsets for that epoch from the offset log (guaranteed by invariant to be there) + * Note that offsets up to those are committed * DONE * ELSE - * Identify a brand new batch + * Start a new query log * DONE */ private def getStartOffsets(sparkSessionToRunBatches: SparkSession): OffsetSeq = { @@ -131,10 +127,10 @@ class ContinuousExecution( } /** - * Processes any data available between `availableOffsets` and `committedOffsets`. - * @param sparkSessionToRunBatch Isolated [[SparkSession]] to run this batch with. + * Do a continuous run. + * @param sparkSessionForQuery Isolated [[SparkSession]] to run the continuous query with. */ - private def runFromOffsets(sparkSessionToRunBatch: SparkSession): Unit = { + private def runContinuous(sparkSessionForQuery: SparkSession): Unit = { import scala.collection.JavaConverters._ // A list of attributes that will need to be updated. val replacements = new ArrayBuffer[(Attribute, Attribute)] @@ -152,7 +148,7 @@ class ContinuousExecution( } uniqueSources = continuousSources.distinct - val offsets = getStartOffsets(sparkSessionToRunBatch) + val offsets = getStartOffsets(sparkSessionForQuery) var insertedSourceId = 0 val withNewSources = logicalPlan transform { @@ -193,7 +189,7 @@ class ContinuousExecution( reportTimeTaken("queryPlanning") { lastExecution = new IncrementalExecution( - sparkSessionToRunBatch, + sparkSessionForQuery, withSink, outputMode, checkpointFile("state"), @@ -250,7 +246,7 @@ class ContinuousExecution( reportTimeTaken("runContinuous") { SQLExecution.withNewExecutionId( - sparkSessionToRunBatch, lastExecution)(lastExecution.toRdd) + sparkSessionForQuery, lastExecution)(lastExecution.toRdd) } } finally { SparkEnv.get.rpcEnv.stop(epochEndpoint) @@ -291,7 +287,7 @@ class ContinuousExecution( } /** - * Blocks the current thread until execution has committed past the specified epoch. + * Blocks the current thread until execution has committed at or after the specified epoch. */ private[sql] def awaitEpoch(epoch: Long): Unit = { def notDone = { @@ -316,7 +312,6 @@ class ContinuousExecution( } } - object ContinuousExecution { val START_EPOCH_KEY = "__continuous_start_epoch" val RUN_ID_KEY = "__run_id" From 1c11b7c3df7bea49e955278bbce154df73024d72 Mon Sep 17 00:00:00 2001 From: Jose Torres Date: Mon, 11 Dec 2017 14:59:14 -0800 Subject: [PATCH 16/50] add comments to EpochCoordinator --- .../continuous/EpochCoordinator.scala | 40 ++++++++++++++----- 1 file changed, 29 insertions(+), 11 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/EpochCoordinator.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/EpochCoordinator.scala index b302f52a6aa19..f846016f4e130 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/EpochCoordinator.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/EpochCoordinator.scala @@ -30,23 +30,41 @@ import org.apache.spark.sql.sources.v2.reader.{ContinuousReader, PartitionOffset import org.apache.spark.sql.sources.v2.writer.{ContinuousWriter, WriterCommitMessage} import org.apache.spark.util.RpcUtils +private[continuous] sealed trait EpochCoordinatorMessage extends Serializable + +// Driver epoch trigger message +/** + * Atomically increment the current epoch and get the new value. + */ +case class IncrementAndGetEpoch() extends EpochCoordinatorMessage + +// Init messages +/** + * Set the reader and writer partition counts. Tasks may not be started until the coordinator + * has acknowledged these messages. + */ +case class SetReaderPartitions(numPartitions: Int) extends EpochCoordinatorMessage +case class SetWriterPartitions(numPartitions: Int) extends EpochCoordinatorMessage + +// Partition task messages +/** + * Get the current epoch. + */ +case class GetCurrentEpoch() extends EpochCoordinatorMessage +/** + * Commit a partition at the specified epoch with the given message. + */ case class CommitPartitionEpoch( partitionId: Int, epoch: Long, - message: WriterCommitMessage) - -case class GetCurrentEpoch() - + message: WriterCommitMessage) extends EpochCoordinatorMessage +/** + * Report that a partition is starting the specified epoch at the specified offset. + */ case class ReportPartitionOffset( partitionId: Int, epoch: Long, - offset: PartitionOffset) - -case class SetReaderPartitions(numPartitions: Int) -case class SetWriterPartitions(numPartitions: Int) - -// Should be used only by ContinuousExecution during epoch advancement. -case class IncrementAndGetEpoch() + offset: PartitionOffset) extends EpochCoordinatorMessage /** Helper object used to create reference to [[EpochCoordinator]]. */ From c6a580c31b8ead580085ddf7ac5f8e925f1e80ea Mon Sep 17 00:00:00 2001 From: Jose Torres Date: Mon, 11 Dec 2017 16:02:54 -0800 Subject: [PATCH 17/50] change offset semantic to end of previous epoch --- .../ContinuousDataSourceRDDIter.scala | 2 +- .../continuous/ContinuousExecution.scala | 27 ++++++++++++++----- .../continuous/EpochCoordinator.scala | 6 ++--- 3 files changed, 24 insertions(+), 11 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousDataSourceRDDIter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousDataSourceRDDIter.scala index 58635a3117492..b675a3a003685 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousDataSourceRDDIter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousDataSourceRDDIter.scala @@ -72,7 +72,7 @@ class ContinuousDataSourceRDD( if (currentRow == null) { epochEndpoint.send(ReportPartitionOffset( context.partitionId(), - newOffset.asInstanceOf[EpochPackedPartitionOffset].epoch, + newOffset.asInstanceOf[EpochPackedPartitionOffset].epoch - 1, currentOffset)) false } else { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala index 16ff14be83b9c..7a61ac397d5ea 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala @@ -98,24 +98,29 @@ class ContinuousExecution( * * Identify (from the commit log) the latest epoch that has committed * IF last epoch exists THEN - * Set the next epoch to run as the next epoch after the last commit - * Get the offsets for that epoch from the offset log (guaranteed by invariant to be there) - * Note that offsets up to those are committed + * Get end offsets for the epoch + * Set those offsets as the current commit progress + * Set the next epoch ID as the last + 1 + * Return the end offsets of the last epoch as start for the next one * DONE * ELSE * Start a new query log * DONE */ private def getStartOffsets(sparkSessionToRunBatches: SparkSession): OffsetSeq = { + // Note that this will need a slight modification for exactly once. If ending offsets were + // reported but not committed for any epochs, we must replay exactly to those offsets. + // For at least once, we can just ignore those reports and risk duplicates. batchCommitLog.getLatest() match { case Some((latestEpochId, _)) => - currentBatchId = latestEpochId + 1 - val nextOffsets = offsetLog.get(currentBatchId).getOrElse { + val nextOffsets = offsetLog.get(latestEpochId).getOrElse { throw new IllegalStateException( - s"Batch $latestEpochId was committed without next epoch offsets!") + s"Batch $latestEpochId was committed without end epoch offsets!") } committedOffsets = nextOffsets.toStreamProgress(sources) + + currentBatchId = latestEpochId + 1 logDebug(s"Resuming at epoch $currentBatchId with committed offsets $committedOffsets") nextOffsets case None => @@ -256,6 +261,9 @@ class ContinuousExecution( } } + /** + * Report ending partition offsets for the given reader at the given epoch. + */ def addOffset( epoch: Long, reader: ContinuousReader, partitionOffsets: Seq[PartitionOffset]): Unit = { assert(continuousSources.length == 1, "only one continuous source supported currently") @@ -270,11 +278,16 @@ class ContinuousExecution( } } + /** + * Mark the specified epoch as committed. All readers must have reported end offsets for the epoch + * before this is called. + */ def commit(epoch: Long): Unit = { assert(continuousSources.length == 1, "only one continuous source supported currently") + assert(offsetLog.get(epoch).isDefined, s"offset for epoch $epoch not reported before commit") synchronized { batchCommitLog.add(epoch) - val offset = offsetLog.get(epoch + 1).get.offsets(0).get + val offset = offsetLog.get(epoch).get.offsets(0).get committedOffsets ++= Seq(continuousSources(0) -> offset) } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/EpochCoordinator.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/EpochCoordinator.scala index f846016f4e130..c91b0546aa43f 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/EpochCoordinator.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/EpochCoordinator.scala @@ -59,7 +59,7 @@ case class CommitPartitionEpoch( epoch: Long, message: WriterCommitMessage) extends EpochCoordinatorMessage /** - * Report that a partition is starting the specified epoch at the specified offset. + * Report that a partition is ending the specified epoch at the specified offset. */ case class ReportPartitionOffset( partitionId: Int, @@ -122,7 +122,7 @@ class EpochCoordinator( val thisEpochCommits = partitionCommits.collect { case ((e, _), msg) if e == epoch => msg } val nextEpochOffsets = - partitionOffsets.collect { case ((e, _), o) if e == epoch + 1 => o } + partitionOffsets.collect { case ((e, _), o) if e == epoch => o } if (thisEpochCommits.size == numWriterPartitions && nextEpochOffsets.size == numReaderPartitions) { @@ -161,7 +161,7 @@ class EpochCoordinator( if (thisEpochOffsets.size == numReaderPartitions) { logDebug(s"Epoch $epoch has offsets reported from all partitions: $thisEpochOffsets") query.addOffset(epoch, reader, thisEpochOffsets.toSeq) - resolveCommitsAtEpoch(epoch - 1) + resolveCommitsAtEpoch(epoch) } } From a76987a74fceba10ffa4be1f9f19c38735c62327 Mon Sep 17 00:00:00 2001 From: Jose Torres Date: Mon, 11 Dec 2017 16:18:40 -0800 Subject: [PATCH 18/50] document EpochCoordinator --- .../continuous/EpochCoordinator.scala | 29 ++++++++++++------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/EpochCoordinator.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/EpochCoordinator.scala index c91b0546aa43f..3afe6bb417415 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/EpochCoordinator.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/EpochCoordinator.scala @@ -36,39 +36,39 @@ private[continuous] sealed trait EpochCoordinatorMessage extends Serializable /** * Atomically increment the current epoch and get the new value. */ -case class IncrementAndGetEpoch() extends EpochCoordinatorMessage +private[sql] case class IncrementAndGetEpoch() extends EpochCoordinatorMessage // Init messages /** * Set the reader and writer partition counts. Tasks may not be started until the coordinator * has acknowledged these messages. */ -case class SetReaderPartitions(numPartitions: Int) extends EpochCoordinatorMessage +private[sql] case class SetReaderPartitions(numPartitions: Int) extends EpochCoordinatorMessage case class SetWriterPartitions(numPartitions: Int) extends EpochCoordinatorMessage // Partition task messages /** * Get the current epoch. */ -case class GetCurrentEpoch() extends EpochCoordinatorMessage +private[sql] case class GetCurrentEpoch() extends EpochCoordinatorMessage /** * Commit a partition at the specified epoch with the given message. */ -case class CommitPartitionEpoch( +private[sql] case class CommitPartitionEpoch( partitionId: Int, epoch: Long, message: WriterCommitMessage) extends EpochCoordinatorMessage /** * Report that a partition is ending the specified epoch at the specified offset. */ -case class ReportPartitionOffset( +private[sql] case class ReportPartitionOffset( partitionId: Int, epoch: Long, offset: PartitionOffset) extends EpochCoordinatorMessage /** Helper object used to create reference to [[EpochCoordinator]]. */ -object EpochCoordinatorRef extends Logging { +private[sql] object EpochCoordinatorRef extends Logging { private def endpointName(runId: String) = s"EpochCoordinator-$runId" /** @@ -95,7 +95,18 @@ object EpochCoordinatorRef extends Logging { } } -class EpochCoordinator( +/** + * Handles three major epoch coordination tasks for continuous processing: + * + * * Maintains a local epoch counter (the "driver epoch"), incremented by IncrementAndGetEpoch + * and pollable from executors by GetCurrentEpoch. Note that this epoch is *not* immediately + * reflected anywhere in ContinuousExecution. + * * Collates ReportPartitionOffset messages, and forwards to ContinuousExecution when all + * readers have ended a given epoch. + * * Collates CommitPartitionEpoch messages, and forwards to ContinuousExecution when all readers + * have both committed and reported an end offset for a given epoch. + */ +private[continuous] class EpochCoordinator( writer: ContinuousWriter, reader: ContinuousReader, startEpoch: Long, @@ -107,14 +118,12 @@ class EpochCoordinator( private var numReaderPartitions: Int = _ private var numWriterPartitions: Int = _ - // Should only be mutated by this coordinator's subthread. private var currentDriverEpoch = startEpoch // (epoch, partition) -> message - // This is small enough that we don't worry too much about optimizing the shape of the structure. private val partitionCommits = mutable.Map[(Long, Int), WriterCommitMessage]() - + // (epoch, partition) -> offset private val partitionOffsets = mutable.Map[(Long, Int), PartitionOffset]() From 1d87302ea196bf6bb3eb35181824c94ca7dbadab Mon Sep 17 00:00:00 2001 From: Jose Torres Date: Mon, 11 Dec 2017 18:06:44 -0800 Subject: [PATCH 19/50] simplify epoch handling --- .../ContinuousDataSourceRDDIter.scala | 38 +++++++++---------- 1 file changed, 18 insertions(+), 20 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousDataSourceRDDIter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousDataSourceRDDIter.scala index b675a3a003685..b78612b0b3377 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousDataSourceRDDIter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousDataSourceRDDIter.scala @@ -35,12 +35,6 @@ import org.apache.spark.sql.sources.v2.reader._ import org.apache.spark.sql.streaming.ProcessingTime import org.apache.spark.util.SystemClock -// There are two types of entry possible: -// (row, offset, null) - a row with offset -// (null, null, epoch) - an epoch marker -// We force both into the same queue to avoid having to synchronize across multiple queues. -case class ReadQueueEntry(row: UnsafeRow, offset: PartitionOffset, epoch: Long) - class ContinuousDataSourceRDD( sc: SparkContext, @transient private val readTasks: java.util.List[ReadTask[UnsafeRow]]) @@ -56,6 +50,7 @@ class ContinuousDataSourceRDD( val reader = split.asInstanceOf[DataSourceRDDPartition].readTask.createDataReader() // TODO: capacity option + // (null, null) is an allowed input to the queue, representing an epoch boundary. val queue = new ArrayBlockingQueue[(UnsafeRow, PartitionOffset)](1024) val epochEndpoint = EpochCoordinatorRef.get( @@ -64,20 +59,24 @@ class ContinuousDataSourceRDD( private var currentRow: UnsafeRow = _ private var currentOffset: PartitionOffset = ContinuousDataSourceRDD.getBaseReader(reader).getOffset + private var currentEpoch = + context.getLocalProperty(ContinuousExecution.START_EPOCH_KEY).toLong override def hasNext(): Boolean = { - val newTuple = queue.take() - val newOffset = newTuple._2 - currentRow = newTuple._1 - if (currentRow == null) { - epochEndpoint.send(ReportPartitionOffset( - context.partitionId(), - newOffset.asInstanceOf[EpochPackedPartitionOffset].epoch - 1, - currentOffset)) - false - } else { - currentOffset = newOffset - true + queue.take() match { + // epoch boundary marker + case (null, null) => + epochEndpoint.send(ReportPartitionOffset( + context.partitionId(), + currentEpoch, + currentOffset)) + currentEpoch += 1 + false + // real row + case (row, offset) => + currentRow = row + currentOffset = offset + true } } @@ -127,7 +126,7 @@ class EpochPollThread( .execute { () => val newEpoch = epochEndpoint.askSync[Long](GetCurrentEpoch()) for (i <- currentEpoch to newEpoch - 1) { - queue.put((null, EpochPackedPartitionOffset(i + 1))) + queue.put((null, null)) logDebug(s"Sent marker to start epoch ${i + 1}") } currentEpoch = newEpoch @@ -138,7 +137,6 @@ class EpochPollThread( // Continuous shutdown might interrupt us, or it might clean up the endpoint before // interrupting us. Unfortunately, a missing endpoint just throws a generic SparkException. // In either case, as long as the context shows interrupted, we can safely clean shutdown. - return } } } From 89b9ee6440cad51bbc49a49a97d3754699c6c4c4 Mon Sep 17 00:00:00 2001 From: Jose Torres Date: Tue, 12 Dec 2017 11:17:58 -0800 Subject: [PATCH 20/50] stress tests --- .../ContinuousDataSourceRDDIter.scala | 2 +- .../continuous/ContinuousSuite.scala | 111 +++++++++++++++++- 2 files changed, 108 insertions(+), 5 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousDataSourceRDDIter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousDataSourceRDDIter.scala index b78612b0b3377..804f21e8cfd49 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousDataSourceRDDIter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousDataSourceRDDIter.scala @@ -154,7 +154,7 @@ class DataReaderThread( "Continuous reader reported no remaining elements! Reader should have blocked waiting.") } - queue.put((reader.get(), baseReader.getOffset)) + queue.put((reader.get().copy(), baseReader.getOffset)) } } catch { case _: InterruptedException if context.isInterrupted() => diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala index 122bc2213cdfa..0239db8ca4143 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala @@ -65,8 +65,9 @@ class ContinuousSuite extends StreamTest { case DataSourceV2ScanExec(_, r: ContinuousRateStreamReader) => r }.get - val deltaMs = (numTriggers - 1) * 1000 + 300 + assert(reader.lastStartTime != 0, "reader last start time not initialized yet") + val deltaMs = (numTriggers - 1) * 1000 + 300 while (System.currentTimeMillis < reader.lastStartTime + deltaMs) { Thread.sleep(reader.lastStartTime + deltaMs - System.currentTimeMillis) } @@ -96,7 +97,8 @@ class ContinuousSuite extends StreamTest { AwaitEpoch(2), Execute(waitForRateSourceTriggers(_, 2)), IncrementEpoch(), - CheckAnswer(scala.Range(0, 20): _*)) + CheckAnswer(scala.Range(0, 20): _*), + StopStream) } test("repeatedly restart") { @@ -122,7 +124,31 @@ class ContinuousSuite extends StreamTest { AwaitEpoch(2), Execute(waitForRateSourceTriggers(_, 2)), IncrementEpoch(), - CheckAnswer(scala.Range(0, 20): _*)) + CheckAnswer(scala.Range(0, 20): _*), + StopStream) + } + + test("rate latency") { + import org.apache.spark.sql.functions.udf + val current_timestamp = udf(() => System.currentTimeMillis()) + val df = spark.readStream + .format("rate") + .option("numPartitions", "2") + .option("rowsPerSecond", "2") + .load() + .select('timestamp.cast("long") as 'ingest, 'value, current_timestamp() as 'processing) + val query = df.writeStream + .format("memory") + .queryName("latency") + .trigger(Trigger.Continuous(100)) + .start() + val continuousExecution = + query.asInstanceOf[StreamingQueryWrapper].streamingQuery.asInstanceOf[ContinuousExecution] + continuousExecution.awaitEpoch(0) + waitForRateSourceTriggers(continuousExecution, 2) + query.stop() + + print(spark.read.table("latency").collect().mkString) } test("query without test harness") { @@ -137,10 +163,87 @@ class ContinuousSuite extends StreamTest { .queryName("noharness") .trigger(Trigger.Continuous(100)) .start() - waitForRateSourceTriggers(query.asInstanceOf[StreamingQueryWrapper].streamingQuery, 2) + val continuousExecution = + query.asInstanceOf[StreamingQueryWrapper].streamingQuery.asInstanceOf[ContinuousExecution] + continuousExecution.awaitEpoch(0) + waitForRateSourceTriggers(continuousExecution, 2) query.stop() val results = spark.read.table("noharness").collect() assert(results.toSet == Set(0, 1, 2, 3).map(Row(_))) } } + +class ContinuousStressSuite extends StreamTest { + + import testImplicits._ + + // We need more than the default local[2] to be able to schedule all partitions simultaneously. + override protected def createSparkSession = new TestSparkSession( + new SparkContext( + "local[10]", + "continuous-stream-test-sql-context", + sparkConf.set("spark.sql.testkey", "true"))) + + private def waitForRateSourceTriggers(query: StreamExecution, numTriggers: Int): Unit = { + query match { + case s: ContinuousExecution => + assert(numTriggers >= 2, "must wait for at least 2 triggers to ensure query is initialized") + val reader = s.lastExecution.executedPlan.collectFirst { + case DataSourceV2ScanExec(_, r: ContinuousRateStreamReader) => r + }.get + + assert(reader.lastStartTime != 0, "reader last start time not initialized yet") + + val deltaMs = (numTriggers - 1) * 1000 + 300 + while (System.currentTimeMillis < reader.lastStartTime + deltaMs) { + Thread.sleep(reader.lastStartTime + deltaMs - System.currentTimeMillis) + } + } + } + + // A continuous trigger that will only fire the initial time for the duration of a test. + // This allows clean testing with manual epoch advancement. + private val longContinuousTrigger = Trigger.Continuous("1 hour") + + test("only one epoch") { + val df = spark.readStream + .format("rate") + .option("numPartitions", "1") + .option("rowsPerSecond", "100") + .load() + .select('value) + + testStream(df, useV2Sink = true)( + StartStream(longContinuousTrigger), + AwaitEpoch(0), + Execute(waitForRateSourceTriggers(_, 200)), + IncrementEpoch(), + CheckAnswer(scala.Range(0, 20000): _*)) + } + + test("automatic epoch advancement") { + val df = spark.readStream + .format("rate") + .option("numPartitions", "1") + .option("rowsPerSecond", "100") + .load() + .select('value) + + testStream(df, useV2Sink = true)( + StartStream(Trigger.Continuous(2012)), + AwaitEpoch(0), + Execute(waitForRateSourceTriggers(_, 200)), + IncrementEpoch(), + Execute { query => + // Because we have automatic advancement, we can't reliably guarantee another trigger won't + // commit more than the 20K rows we expect before we can check. So we simply ensure that: + // * the highest value committed was at least 20000 - 1 + // * all values below the highest are present + val data = query.sink.asInstanceOf[MemorySinkV2].allData + val max = data.map(_.getLong(0)).max + assert(max >= 19999) + assert(data.toSet == scala.Range(0, max.toInt + 1).map(Row(_)).toSet) + }) + } +} From bdafb15ac1dc323c635b2e3c9efc3d6d67fa89fc Mon Sep 17 00:00:00 2001 From: Jose Torres Date: Tue, 12 Dec 2017 12:17:22 -0800 Subject: [PATCH 21/50] add minBatchesToRetain --- .../execution/streaming/continuous/ContinuousExecution.scala | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala index 7a61ac397d5ea..9edc19f74dffe 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala @@ -291,6 +291,11 @@ class ContinuousExecution( committedOffsets ++= Seq(continuousSources(0) -> offset) } + if (minBatchesToRetain < currentBatchId) { + offsetLog.purge(currentBatchId - minBatchesToRetain) + batchCommitLog.purge(currentBatchId - minBatchesToRetain) + } + awaitProgressLock.lock() try { awaitProgressLockCondition.signalAll() From 5ad34a14cfeb89ae517c7ce56bc7bc9171b305be Mon Sep 17 00:00:00 2001 From: Jose Torres Date: Tue, 12 Dec 2017 12:43:33 -0800 Subject: [PATCH 22/50] add confs --- .../apache/spark/sql/internal/SQLConf.scala | 21 +++++++++++++++++++ .../datasources/v2/DataSourceV2ScanExec.scala | 2 +- .../ContinuousDataSourceRDDIter.scala | 17 ++++++++------- 3 files changed, 32 insertions(+), 8 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index bdc8d92e84079..60462cedd8c77 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -1044,6 +1044,22 @@ object SQLConf { "When this conf is not set, the value from `spark.redaction.string.regex` is used.") .fallbackConf(org.apache.spark.internal.config.STRING_REDACTION_PATTERN) + val CONTINUOUS_STREAMING_EXECUTOR_QUEUE_SIZE = + buildConf("spark.sql.streaming.continuous.executorQueueSize") + .internal() + .doc("The size (measured in number of rows) of the queue used in continuous execution to" + + " buffer the results of a ContinuousDataReader.") + .intConf + .createWithDefault(1024) + + val CONTINUOUS_STREAMING_EXECUTOR_POLL_INTERVAL_MS = + buildConf("spark.sql.streaming.continuous.executorPollIntervalMs") + .internal() + .doc("The interval at which continuous execution readers will poll to check whether" + + " the epoch has advanced on the driver.") + .intConf + .createWithDefault(100) + object Deprecated { val MAPRED_REDUCE_TASKS = "mapred.reduce.tasks" } @@ -1357,6 +1373,11 @@ class SQLConf extends Serializable with Logging { def replaceExceptWithFilter: Boolean = getConf(REPLACE_EXCEPT_WITH_FILTER) + def continuousStreamingExecutorQueueSize: Int = getConf(CONTINUOUS_STREAMING_EXECUTOR_QUEUE_SIZE) + + def continuousStreamingExecutorPollIntervalMs: Int = + getConf(CONTINUOUS_STREAMING_EXECUTOR_POLL_INTERVAL_MS) + /** ********************** SQLConf functionality methods ************ */ /** Set Spark SQL configuration properties. */ diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2ScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2ScanExec.scala index 10fca4aac7c41..e4fca1b10dfad 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2ScanExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2ScanExec.scala @@ -60,7 +60,7 @@ case class DataSourceV2ScanExec( sparkContext.getLocalProperty(ContinuousExecution.RUN_ID_KEY), sparkContext.env) .askSync[Unit](SetReaderPartitions(readTasks.size())) - new ContinuousDataSourceRDD(sparkContext, readTasks) + new ContinuousDataSourceRDD(sparkContext, sqlContext, readTasks) case _ => new DataSourceRDD(sparkContext, readTasks) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousDataSourceRDDIter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousDataSourceRDDIter.scala index 804f21e8cfd49..c92980bea3065 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousDataSourceRDDIter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousDataSourceRDDIter.scala @@ -26,7 +26,7 @@ import org.apache.spark._ import org.apache.spark.internal.Logging import org.apache.spark.rdd.RDD import org.apache.spark.rpc.RpcEndpointRef -import org.apache.spark.sql.Row +import org.apache.spark.sql.{Row, SQLContext} import org.apache.spark.sql.catalyst.expressions.UnsafeRow import org.apache.spark.sql.execution.datasources.v2.{DataSourceRDDPartition, RowToUnsafeDataReader} import org.apache.spark.sql.execution.streaming._ @@ -37,9 +37,13 @@ import org.apache.spark.util.SystemClock class ContinuousDataSourceRDD( sc: SparkContext, + sqlContext: SQLContext, @transient private val readTasks: java.util.List[ReadTask[UnsafeRow]]) extends RDD[UnsafeRow](sc, Nil) { + private val dataQueueSize = sqlContext.conf.continuousStreamingExecutorQueueSize + private val epochPollIntervalMs = sqlContext.conf.continuousStreamingExecutorPollIntervalMs + override protected def getPartitions: Array[Partition] = { readTasks.asScala.zipWithIndex.map { case (readTask, index) => new DataSourceRDDPartition(index, readTask) @@ -49,9 +53,8 @@ class ContinuousDataSourceRDD( override def compute(split: Partition, context: TaskContext): Iterator[UnsafeRow] = { val reader = split.asInstanceOf[DataSourceRDDPartition].readTask.createDataReader() - // TODO: capacity option // (null, null) is an allowed input to the queue, representing an epoch boundary. - val queue = new ArrayBlockingQueue[(UnsafeRow, PartitionOffset)](1024) + val queue = new ArrayBlockingQueue[(UnsafeRow, PartitionOffset)](dataQueueSize) val epochEndpoint = EpochCoordinatorRef.get( context.getLocalProperty(ContinuousExecution.RUN_ID_KEY), SparkEnv.get) @@ -88,7 +91,7 @@ class ContinuousDataSourceRDD( } - val epochPollThread = new EpochPollThread(queue, context) + val epochPollThread = new EpochPollThread(queue, context, epochPollIntervalMs) epochPollThread.setDaemon(true) epochPollThread.start() @@ -113,7 +116,8 @@ case class EpochPackedPartitionOffset(epoch: Long) extends PartitionOffset class EpochPollThread( queue: BlockingQueue[(UnsafeRow, PartitionOffset)], - context: TaskContext) + context: TaskContext, + epochPollIntervalMs: Long) extends Thread with Logging { private val epochEndpoint = EpochCoordinatorRef.get( context.getLocalProperty(ContinuousExecution.RUN_ID_KEY), SparkEnv.get) @@ -121,8 +125,7 @@ class EpochPollThread( override def run(): Unit = { try { - // TODO parameterize processing time - ProcessingTimeExecutor(ProcessingTime(100), new SystemClock()) + ProcessingTimeExecutor(ProcessingTime(epochPollIntervalMs), new SystemClock()) .execute { () => val newEpoch = epochEndpoint.askSync[Long](GetCurrentEpoch()) for (i <- currentEpoch to newEpoch - 1) { From 1da0559de8e94252b0388904c2afaf7414f156cf Mon Sep 17 00:00:00 2001 From: Jose Torres Date: Tue, 12 Dec 2017 13:10:33 -0800 Subject: [PATCH 23/50] latency suite not meaningful here --- .../continuous/ContinuousSuite.scala | 23 ------------------- 1 file changed, 23 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala index 0239db8ca4143..03f4c108f7f90 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala @@ -128,29 +128,6 @@ class ContinuousSuite extends StreamTest { StopStream) } - test("rate latency") { - import org.apache.spark.sql.functions.udf - val current_timestamp = udf(() => System.currentTimeMillis()) - val df = spark.readStream - .format("rate") - .option("numPartitions", "2") - .option("rowsPerSecond", "2") - .load() - .select('timestamp.cast("long") as 'ingest, 'value, current_timestamp() as 'processing) - val query = df.writeStream - .format("memory") - .queryName("latency") - .trigger(Trigger.Continuous(100)) - .start() - val continuousExecution = - query.asInstanceOf[StreamingQueryWrapper].streamingQuery.asInstanceOf[ContinuousExecution] - continuousExecution.awaitEpoch(0) - waitForRateSourceTriggers(continuousExecution, 2) - query.stop() - - print(spark.read.table("latency").collect().mkString) - } - test("query without test harness") { val df = spark.readStream .format("rate") From 7c5b4389b305103f39a74478c7ef1ca4e22d6c8e Mon Sep 17 00:00:00 2001 From: Jose Torres Date: Tue, 12 Dec 2017 16:04:07 -0800 Subject: [PATCH 24/50] more stress::q --- .../continuous/ContinuousSuite.scala | 57 ++++++++++++++++--- 1 file changed, 49 insertions(+), 8 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala index 03f4c108f7f90..efc1b6c9d3692 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala @@ -186,8 +186,8 @@ class ContinuousStressSuite extends StreamTest { test("only one epoch") { val df = spark.readStream .format("rate") - .option("numPartitions", "1") - .option("rowsPerSecond", "100") + .option("numPartitions", "5") + .option("rowsPerSecond", "500") .load() .select('value) @@ -196,14 +196,14 @@ class ContinuousStressSuite extends StreamTest { AwaitEpoch(0), Execute(waitForRateSourceTriggers(_, 200)), IncrementEpoch(), - CheckAnswer(scala.Range(0, 20000): _*)) + CheckAnswer(scala.Range(0, 100000): _*)) } test("automatic epoch advancement") { val df = spark.readStream .format("rate") - .option("numPartitions", "1") - .option("rowsPerSecond", "100") + .option("numPartitions", "5") + .option("rowsPerSecond", "500") .load() .select('value) @@ -214,13 +214,54 @@ class ContinuousStressSuite extends StreamTest { IncrementEpoch(), Execute { query => // Because we have automatic advancement, we can't reliably guarantee another trigger won't - // commit more than the 20K rows we expect before we can check. So we simply ensure that: - // * the highest value committed was at least 20000 - 1 + // commit more than the 100K rows we expect before we can check. So we simply ensure that: + // * the highest value committed was at least 100000 - 1 // * all values below the highest are present val data = query.sink.asInstanceOf[MemorySinkV2].allData val max = data.map(_.getLong(0)).max - assert(max >= 19999) + assert(max >= 99999) assert(data.toSet == scala.Range(0, max.toInt + 1).map(Row(_)).toSet) }) } + + test("restarts") { + val df = spark.readStream + .format("rate") + .option("numPartitions", "5") + .option("rowsPerSecond", "500") + .load() + .select('value) + + testStream(df, useV2Sink = true)( + StartStream(Trigger.Continuous(2012)), + AwaitEpoch(10), + StopStream, + StartStream(Trigger.Continuous(2012)), + AwaitEpoch(20), + StopStream, + StartStream(Trigger.Continuous(2012)), + AwaitEpoch(21), + StopStream, + StartStream(Trigger.Continuous(2012)), + AwaitEpoch(22), + StopStream, + StartStream(Trigger.Continuous(2012)), + AwaitEpoch(25), + StopStream, + StartStream(Trigger.Continuous(2012)), + StopStream, + StartStream(Trigger.Continuous(2012)), + AwaitEpoch(50), + Execute { query => + // Because we have automatic advancement, we can't reliably check where precisely the last + // commit happened. And we don't have exactly once processing, meaning values may be + // duplicated. So we just check all values below the highest are present, and as a + // sanity check that we got at least up to the 50th trigger. + val data = query.sink.asInstanceOf[MemorySinkV2].allData + val max = data.map(_.getLong(0)).max + assert(max > 25000) + val setDiff = data.toSet.diff(scala.Range(0, max.toInt + 1).map(Row(_)).toSet) + assert(setDiff.isEmpty, s"sets differed by $setDiff") + }) + } } From bedd7b3178c0fa1b52ce85155a172d0beda1322f Mon Sep 17 00:00:00 2001 From: Jose Torres Date: Wed, 13 Dec 2017 10:55:23 -0800 Subject: [PATCH 25/50] use temp dir --- .../org/apache/spark/sql/streaming/DataStreamReader.scala | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala index 0a48479993450..f17935e86f459 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala @@ -29,6 +29,7 @@ import org.apache.spark.sql.execution.datasources.DataSource import org.apache.spark.sql.execution.streaming.{StreamingRelation, StreamingRelationV2} import org.apache.spark.sql.sources.v2.{ContinuousReadSupport, DataSourceV2Options, MicroBatchReadSupport} import org.apache.spark.sql.types.StructType +import org.apache.spark.util.Utils /** * Interface used to load a streaming `Dataset` from external storage systems (e.g. file systems, @@ -166,11 +167,9 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo options = extraOptions.toMap) ds match { case s: ContinuousReadSupport => - // TODO: What do we pass as the metadata log path? We just need some scratch space, the - // schema can't depend on it val tempReader = s.createContinuousReader( java.util.Optional.ofNullable(userSpecifiedSchema.orNull), - "scratch/path/for/schema", + Utils.createTempDir(namePrefix = s"temporaryReader").getCanonicalPath, options) // Generate the V1 node to catch errors thrown within generation. StreamingRelation(v1DataSource) From e5bf024dd8b1fb41e3f73439efbb2e918ba46d1c Mon Sep 17 00:00:00 2001 From: Jose Torres Date: Thu, 14 Dec 2017 11:37:36 -0800 Subject: [PATCH 26/50] fix against rebase --- .../continuous/ContinuousRateStreamSource.scala | 4 ---- .../sql/streaming/continuous/ContinuousSuite.scala | 12 ++++-------- 2 files changed, 4 insertions(+), 12 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousRateStreamSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousRateStreamSource.scala index a66be6634814d..4c3a1ee201ac1 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousRateStreamSource.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousRateStreamSource.scala @@ -66,9 +66,6 @@ class ContinuousRateStreamReader(options: DataSourceV2Options) override def getStartOffset(): Offset = offset - // Exposed so unit tests can reliably ensure they end after a desired row count. - private[sql] var lastStartTime: Long = _ - override def createReadTasks(): java.util.List[ReadTask[Row]] = { val partitionStartMap = offset match { case off: RateStreamOffset => off.partitionToValueAndRunTimeMs @@ -140,7 +137,6 @@ class RateStreamDataReader( return false } - currentValue += increment currentRow = Row( DateTimeUtils.toJavaTimestamp(DateTimeUtils.fromMillis(nextReadTime)), currentValue) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala index efc1b6c9d3692..cf8621ed56d80 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala @@ -65,11 +65,9 @@ class ContinuousSuite extends StreamTest { case DataSourceV2ScanExec(_, r: ContinuousRateStreamReader) => r }.get - assert(reader.lastStartTime != 0, "reader last start time not initialized yet") - val deltaMs = (numTriggers - 1) * 1000 + 300 - while (System.currentTimeMillis < reader.lastStartTime + deltaMs) { - Thread.sleep(reader.lastStartTime + deltaMs - System.currentTimeMillis) + while (System.currentTimeMillis < reader.creationTime + deltaMs) { + Thread.sleep(reader.creationTime + deltaMs - System.currentTimeMillis) } } } @@ -170,11 +168,9 @@ class ContinuousStressSuite extends StreamTest { case DataSourceV2ScanExec(_, r: ContinuousRateStreamReader) => r }.get - assert(reader.lastStartTime != 0, "reader last start time not initialized yet") - val deltaMs = (numTriggers - 1) * 1000 + 300 - while (System.currentTimeMillis < reader.lastStartTime + deltaMs) { - Thread.sleep(reader.lastStartTime + deltaMs - System.currentTimeMillis) + while (System.currentTimeMillis < reader.creationTime + deltaMs) { + Thread.sleep(reader.creationTime + deltaMs - System.currentTimeMillis) } } } From d16410fd7d186dc3de52ef3fe34e148353743f30 Mon Sep 17 00:00:00 2001 From: Jose Torres Date: Thu, 14 Dec 2017 13:32:16 -0800 Subject: [PATCH 27/50] fix ser/deser --- .../streaming/MicroBatchExecution.scala | 2 -- .../streaming/RateStreamOffset.scala | 5 +++- .../execution/streaming/StreamExecution.scala | 2 +- .../ContinuousDataSourceRDDIter.scala | 9 ++++-- .../continuous/ContinuousExecution.scala | 14 +++++---- .../ContinuousRateStreamSource.scala | 9 +++--- .../sources/RateStreamSourceV2.scala | 17 ++++++----- .../streaming/RateSourceV2Suite.scala | 20 ++++++------- .../spark/sql/streaming/StreamTest.scala | 2 +- .../continuous/ContinuousSuite.scala | 29 ++++++++++++------- 10 files changed, 63 insertions(+), 46 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala index 09c6f64ca5a2c..7b1d4a7149e21 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala @@ -42,8 +42,6 @@ class MicroBatchExecution( sparkSession, name, checkpointRoot, analyzedPlan, sink, trigger, triggerClock, outputMode, deleteCheckpointOnStop) { - override val offsetLog = new OffsetSeqLog(sparkSession, checkpointFile("offsets")) - override val batchCommitLog = new BatchCommitLog(sparkSession, checkpointFile("commits")) @volatile protected var sources: Seq[BaseStreamingSource] = Seq.empty private val triggerExecutor = trigger match { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/RateStreamOffset.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/RateStreamOffset.scala index 726d8574af52b..65d6d18936167 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/RateStreamOffset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/RateStreamOffset.scala @@ -22,8 +22,11 @@ import org.json4s.jackson.Serialization import org.apache.spark.sql.sources.v2 -case class RateStreamOffset(partitionToValueAndRunTimeMs: Map[Int, (Long, Long)]) +case class RateStreamOffset(partitionToValueAndRunTimeMs: Map[Int, ValueRunTimeMsPair]) extends v2.reader.Offset { implicit val defaultFormats: DefaultFormats = DefaultFormats override val json = Serialization.write(partitionToValueAndRunTimeMs) } + + +case class ValueRunTimeMsPair(value: Long, runTimeMs: Long) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala index 9162398650484..ccace1f799e0c 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala @@ -196,7 +196,7 @@ abstract class StreamExecution( * processing is done. Thus, the Nth record in this log indicated data that is currently being * processed and the N-1th entry indicates which offsets have been durably committed to the sink. */ - def offsetLog: OffsetSeqLog + val offsetLog = new OffsetSeqLog(sparkSession, checkpointFile("offsets")) /** * A log that records the batch ids that have completed. This is used to check if a batch was diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousDataSourceRDDIter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousDataSourceRDDIter.scala index c92980bea3065..592028b559e66 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousDataSourceRDDIter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousDataSourceRDDIter.scala @@ -153,8 +153,13 @@ class DataReaderThread( try { while (!context.isInterrupted && !context.isCompleted()) { if (!reader.next()) { - throw new IllegalStateException( - "Continuous reader reported no remaining elements! Reader should have blocked waiting.") + // Check again, since reader.next() might have blocked through an incoming interrupt. + if (!context.isInterrupted && !context.isCompleted()) { + throw new IllegalStateException( + "Continuous reader reported no elements! Reader should have blocked waiting.") + } else { + return + } } queue.put((reader.get().copy(), baseReader.getOffset)) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala index 9edc19f74dffe..58742e5d265f3 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala @@ -119,7 +119,6 @@ class ContinuousExecution( } committedOffsets = nextOffsets.toStreamProgress(sources) - currentBatchId = latestEpochId + 1 logDebug(s"Resuming at epoch $currentBatchId with committed offsets $committedOffsets") nextOffsets @@ -167,7 +166,9 @@ class ContinuousExecution( s"${Utils.truncatedString(newOutput, ",")}") replacements ++= output.zip(newOutput) - reader.setOffset(java.util.Optional.ofNullable(offsets.offsets(0).orNull)) + val loggedOffset = offsets.offsets(0) + val realOffset = loggedOffset.map(off => reader.deserializeOffset(off.json)) + reader.setOffset(java.util.Optional.ofNullable(realOffset.orNull)) DataSourceV2Relation(newOutput, reader) } @@ -291,9 +292,9 @@ class ContinuousExecution( committedOffsets ++= Seq(continuousSources(0) -> offset) } - if (minBatchesToRetain < currentBatchId) { - offsetLog.purge(currentBatchId - minBatchesToRetain) - batchCommitLog.purge(currentBatchId - minBatchesToRetain) + if (minLogEntriesToMaintain < currentBatchId) { + offsetLog.purge(currentBatchId - minLogEntriesToMaintain) + batchCommitLog.purge(currentBatchId - minLogEntriesToMaintain) } awaitProgressLock.lock() @@ -311,7 +312,8 @@ class ContinuousExecution( def notDone = { val latestCommit = batchCommitLog.getLatest() latestCommit match { - case Some((latestEpoch, _)) => latestEpoch < epoch + case Some((latestEpoch, _)) => + latestEpoch < epoch case None => true } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousRateStreamSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousRateStreamSource.scala index 4c3a1ee201ac1..1ce22c994aa8e 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousRateStreamSource.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousRateStreamSource.scala @@ -47,13 +47,14 @@ class ContinuousRateStreamReader(options: DataSourceV2Options) override def mergeOffsets(offsets: Array[PartitionOffset]): Offset = { assert(offsets.length == numPartitions) val tuples = offsets.map { - case ContinuousRateStreamPartitionOffset(i, currVal, nextRead) => (i, (currVal, nextRead)) + case ContinuousRateStreamPartitionOffset(i, currVal, nextRead) => + (i, ValueRunTimeMsPair(currVal, nextRead)) } RateStreamOffset(Map(tuples: _*)) } override def deserializeOffset(json: String): Offset = { - RateStreamOffset(Serialization.read[Map[Int, (Long, Long)]](json)) + RateStreamOffset(Serialization.read[Map[Int, ValueRunTimeMsPair]](json)) } override def readSchema(): StructType = RateSourceProvider.SCHEMA @@ -85,8 +86,8 @@ class ContinuousRateStreamReader(options: DataSourceV2Options) // Have each partition advance by numPartitions each row, with starting points staggered // by their partition index. RateStreamReadTask( - start._1, // starting row value - start._2, // starting time in ms + start.value, + start.runTimeMs, i, numPartitions, perPartitionRate) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamSourceV2.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamSourceV2.scala index 45dc7d75cbc8d..7687dd146802d 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamSourceV2.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamSourceV2.scala @@ -71,7 +71,7 @@ class RateStreamV2Reader(options: DataSourceV2Options) val currentTime = clock.getTimeMillis() RateStreamOffset( this.start.partitionToValueAndRunTimeMs.map { - case startOffset @ (part, (currentVal, currentReadTime)) => + case startOffset @ (part, ValueRunTimeMsPair(currentVal, currentReadTime)) => // Calculate the number of rows we should advance in this partition (based on the // current time), and output a corresponding offset. val readInterval = currentTime - currentReadTime @@ -79,9 +79,9 @@ class RateStreamV2Reader(options: DataSourceV2Options) if (numNewRows <= 0) { startOffset } else { - (part, - (currentVal + (numNewRows * numPartitions), - currentReadTime + (numNewRows * msPerPartitionBetweenRows))) + (part, ValueRunTimeMsPair( + currentVal + (numNewRows * numPartitions), + currentReadTime + (numNewRows * msPerPartitionBetweenRows))) } } ) @@ -98,15 +98,15 @@ class RateStreamV2Reader(options: DataSourceV2Options) } override def deserializeOffset(json: String): Offset = { - RateStreamOffset(Serialization.read[Map[Int, (Long, Long)]](json)) + RateStreamOffset(Serialization.read[Map[Int, ValueRunTimeMsPair]](json)) } override def createReadTasks(): java.util.List[ReadTask[Row]] = { val startMap = start.partitionToValueAndRunTimeMs val endMap = end.partitionToValueAndRunTimeMs endMap.keys.toSeq.map { part => - val (endVal, _) = endMap(part) - val (startVal, startTimeMs) = startMap(part) + val ValueRunTimeMsPair(endVal, _) = endMap(part) + val ValueRunTimeMsPair(startVal, startTimeMs) = startMap(part) val packedRows = mutable.ListBuffer[(Long, Long)]() var outVal = startVal + numPartitions @@ -158,7 +158,8 @@ object RateStreamSourceV2 { // by the increment that will later be applied. The first row output in each // partition will have a value equal to the partition index. (i, - ((i - numPartitions).toLong, + ValueRunTimeMsPair( + (i - numPartitions).toLong, creationTimeMs)) }.toMap) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/RateSourceV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/RateSourceV2Suite.scala index 6514c5f0fdfeb..dde41713c47a8 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/RateSourceV2Suite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/RateSourceV2Suite.scala @@ -49,8 +49,8 @@ class RateSourceV2Suite extends StreamTest { test("microbatch - set offset") { val reader = new RateStreamV2Reader(DataSourceV2Options.empty()) - val startOffset = RateStreamOffset(Map((0, (0, 1000)))) - val endOffset = RateStreamOffset(Map((0, (0, 2000)))) + val startOffset = RateStreamOffset(Map((0, ValueRunTimeMsPair(0, 1000)))) + val endOffset = RateStreamOffset(Map((0, ValueRunTimeMsPair(0, 2000)))) reader.setOffsetRange(Optional.of(startOffset), Optional.of(endOffset)) assert(reader.getStartOffset() == startOffset) assert(reader.getEndOffset() == endOffset) @@ -63,15 +63,15 @@ class RateSourceV2Suite extends StreamTest { reader.setOffsetRange(Optional.empty(), Optional.empty()) reader.getStartOffset() match { case r: RateStreamOffset => - assert(r.partitionToValueAndRunTimeMs(0)._2 == reader.creationTimeMs) + assert(r.partitionToValueAndRunTimeMs(0).runTimeMs == reader.creationTimeMs) case _ => throw new IllegalStateException("unexpected offset type") } reader.getEndOffset() match { case r: RateStreamOffset => // End offset may be a bit beyond 100 ms/9 rows after creation if the wait lasted // longer than 100ms. It should never be early. - assert(r.partitionToValueAndRunTimeMs(0)._1 >= 9) - assert(r.partitionToValueAndRunTimeMs(0)._2 >= reader.creationTimeMs + 100) + assert(r.partitionToValueAndRunTimeMs(0).value >= 9) + assert(r.partitionToValueAndRunTimeMs(0).runTimeMs >= reader.creationTimeMs + 100) case _ => throw new IllegalStateException("unexpected offset type") } @@ -80,8 +80,8 @@ class RateSourceV2Suite extends StreamTest { test("microbatch - predetermined batch size") { val reader = new RateStreamV2Reader( new DataSourceV2Options(Map("numPartitions" -> "1", "rowsPerSecond" -> "20").asJava)) - val startOffset = RateStreamOffset(Map((0, (0, 1000)))) - val endOffset = RateStreamOffset(Map((0, (20, 2000)))) + val startOffset = RateStreamOffset(Map((0, ValueRunTimeMsPair(0, 1000)))) + val endOffset = RateStreamOffset(Map((0, ValueRunTimeMsPair(20, 2000)))) reader.setOffsetRange(Optional.of(startOffset), Optional.of(endOffset)) val tasks = reader.createReadTasks() assert(tasks.size == 1) @@ -93,8 +93,8 @@ class RateSourceV2Suite extends StreamTest { new DataSourceV2Options(Map("numPartitions" -> "11", "rowsPerSecond" -> "33").asJava)) val startOffset = RateStreamSourceV2.createInitialOffset(11, reader.creationTimeMs) val endOffset = RateStreamOffset(startOffset.partitionToValueAndRunTimeMs.toSeq.map { - case (part, (currentVal, currentReadTime)) => - (part, (currentVal + 33, currentReadTime + 1000)) + case (part, ValueRunTimeMsPair(currentVal, currentReadTime)) => + (part, ValueRunTimeMsPair(currentVal + 33, currentReadTime + 1000)) }.toMap) reader.setOffsetRange(Optional.of(startOffset), Optional.of(endOffset)) @@ -135,7 +135,7 @@ class RateSourceV2Suite extends StreamTest { val startTimeMs = reader.getStartOffset() .asInstanceOf[RateStreamOffset] .partitionToValueAndRunTimeMs(t.partitionIndex) - ._2 + .runTimeMs val r = t.createDataReader().asInstanceOf[RateStreamDataReader] for (rowIndex <- 0 to 9) { r.next() diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala index 1c589bbdffb83..e2b1aa3c74825 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala @@ -414,7 +414,7 @@ trait StreamTest extends QueryTest with SharedSQLContext with TimeLimits with Be case s: MemorySink => (s.latestBatchData, s.allData) case s: MemorySinkV2 => (s.latestBatchData, s.allData) } - val sparkAnswer = try if (lastOnly) latestBatchData else allData catch { + try if (lastOnly) latestBatchData else allData catch { case e: Exception => failTest("Exception while getting data from sink", e) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala index cf8621ed56d80..187633c87fe7c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala @@ -65,7 +65,7 @@ class ContinuousSuite extends StreamTest { case DataSourceV2ScanExec(_, r: ContinuousRateStreamReader) => r }.get - val deltaMs = (numTriggers - 1) * 1000 + 300 + val deltaMs = numTriggers * 1000 + 300 while (System.currentTimeMillis < reader.creationTime + deltaMs) { Thread.sleep(reader.creationTime + deltaMs - System.currentTimeMillis) } @@ -190,9 +190,15 @@ class ContinuousStressSuite extends StreamTest { testStream(df, useV2Sink = true)( StartStream(longContinuousTrigger), AwaitEpoch(0), - Execute(waitForRateSourceTriggers(_, 200)), + Execute(waitForRateSourceTriggers(_, 201)), IncrementEpoch(), - CheckAnswer(scala.Range(0, 100000): _*)) + Execute { query => + val data = query.sink.asInstanceOf[MemorySinkV2].allData + val vals = data.map(_.getLong(0)).toSet + assert(scala.Range(0, 25000).forall { i => + vals.contains(i) + }) + }) } test("automatic epoch advancement") { @@ -206,7 +212,7 @@ class ContinuousStressSuite extends StreamTest { testStream(df, useV2Sink = true)( StartStream(Trigger.Continuous(2012)), AwaitEpoch(0), - Execute(waitForRateSourceTriggers(_, 200)), + Execute(waitForRateSourceTriggers(_, 201)), IncrementEpoch(), Execute { query => // Because we have automatic advancement, we can't reliably guarantee another trigger won't @@ -214,9 +220,10 @@ class ContinuousStressSuite extends StreamTest { // * the highest value committed was at least 100000 - 1 // * all values below the highest are present val data = query.sink.asInstanceOf[MemorySinkV2].allData - val max = data.map(_.getLong(0)).max - assert(max >= 99999) - assert(data.toSet == scala.Range(0, max.toInt + 1).map(Row(_)).toSet) + val vals = data.map(_.getLong(0)).toSet + assert(scala.Range(0, 25000).forall { i => + vals.contains(i) + }) }) } @@ -254,10 +261,10 @@ class ContinuousStressSuite extends StreamTest { // duplicated. So we just check all values below the highest are present, and as a // sanity check that we got at least up to the 50th trigger. val data = query.sink.asInstanceOf[MemorySinkV2].allData - val max = data.map(_.getLong(0)).max - assert(max > 25000) - val setDiff = data.toSet.diff(scala.Range(0, max.toInt + 1).map(Row(_)).toSet) - assert(setDiff.isEmpty, s"sets differed by $setDiff") + val vals = data.map(_.getLong(0)).toSet + assert(scala.Range(0, 25000).forall { i => + vals.contains(i) + }) }) } } From 6eaba324842697eb160961f41bc0a32e37895381 Mon Sep 17 00:00:00 2001 From: Jose Torres Date: Thu, 14 Dec 2017 14:41:21 -0800 Subject: [PATCH 28/50] fix rebase compile --- .../streaming/continuous/ContinuousExecution.scala | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala index 58742e5d265f3..2ee900bd988fe 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala @@ -111,7 +111,7 @@ class ContinuousExecution( // Note that this will need a slight modification for exactly once. If ending offsets were // reported but not committed for any epochs, we must replay exactly to those offsets. // For at least once, we can just ignore those reports and risk duplicates. - batchCommitLog.getLatest() match { + commitLog.getLatest() match { case Some((latestEpochId, _)) => val nextOffsets = offsetLog.get(latestEpochId).getOrElse { throw new IllegalStateException( @@ -287,14 +287,14 @@ class ContinuousExecution( assert(continuousSources.length == 1, "only one continuous source supported currently") assert(offsetLog.get(epoch).isDefined, s"offset for epoch $epoch not reported before commit") synchronized { - batchCommitLog.add(epoch) + commitLog.add(epoch) val offset = offsetLog.get(epoch).get.offsets(0).get committedOffsets ++= Seq(continuousSources(0) -> offset) } if (minLogEntriesToMaintain < currentBatchId) { offsetLog.purge(currentBatchId - minLogEntriesToMaintain) - batchCommitLog.purge(currentBatchId - minLogEntriesToMaintain) + commitLog.purge(currentBatchId - minLogEntriesToMaintain) } awaitProgressLock.lock() @@ -310,7 +310,7 @@ class ContinuousExecution( */ private[sql] def awaitEpoch(epoch: Long): Unit = { def notDone = { - val latestCommit = batchCommitLog.getLatest() + val latestCommit = commitLog.getLatest() latestCommit match { case Some((latestEpoch, _)) => latestEpoch < epoch From b48c8b0198d34ebe09bb494b4f6a01b0994803e7 Mon Sep 17 00:00:00 2001 From: Jose Torres Date: Thu, 14 Dec 2017 15:35:46 -0800 Subject: [PATCH 29/50] stop using ProcessingTime in executor --- .../ContinuousDataSourceRDDIter.scala | 48 ++++++++----------- 1 file changed, 20 insertions(+), 28 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousDataSourceRDDIter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousDataSourceRDDIter.scala index 592028b559e66..5c4a823652177 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousDataSourceRDDIter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousDataSourceRDDIter.scala @@ -17,7 +17,7 @@ package org.apache.spark.sql.execution.streaming.continuous -import java.util.concurrent.{ArrayBlockingQueue, BlockingQueue} +import java.util.concurrent.{ArrayBlockingQueue, BlockingQueue, TimeUnit} import java.util.concurrent.atomic.{AtomicBoolean, AtomicLong} import scala.collection.JavaConverters._ @@ -33,7 +33,7 @@ import org.apache.spark.sql.execution.streaming._ import org.apache.spark.sql.execution.streaming.continuous._ import org.apache.spark.sql.sources.v2.reader._ import org.apache.spark.sql.streaming.ProcessingTime -import org.apache.spark.util.SystemClock +import org.apache.spark.util.{SystemClock, ThreadUtils} class ContinuousDataSourceRDD( sc: SparkContext, @@ -53,11 +53,12 @@ class ContinuousDataSourceRDD( override def compute(split: Partition, context: TaskContext): Iterator[UnsafeRow] = { val reader = split.asInstanceOf[DataSourceRDDPartition].readTask.createDataReader() + val runId = context.getLocalProperty(ContinuousExecution.RUN_ID_KEY) + // (null, null) is an allowed input to the queue, representing an epoch boundary. val queue = new ArrayBlockingQueue[(UnsafeRow, PartitionOffset)](dataQueueSize) - val epochEndpoint = EpochCoordinatorRef.get( - context.getLocalProperty(ContinuousExecution.RUN_ID_KEY), SparkEnv.get) + val epochEndpoint = EpochCoordinatorRef.get(runId, SparkEnv.get) val itr = new Iterator[UnsafeRow] { private var currentRow: UnsafeRow = _ private var currentOffset: PartitionOffset = @@ -90,10 +91,13 @@ class ContinuousDataSourceRDD( } } - - val epochPollThread = new EpochPollThread(queue, context, epochPollIntervalMs) - epochPollThread.setDaemon(true) - epochPollThread.start() + val epochPollExecutor = ThreadUtils.newDaemonSingleThreadScheduledExecutor( + s"epoch-poll--${runId}--${context.partitionId()}") + epochPollExecutor.scheduleWithFixedDelay( + new EpochPollRunnable(queue, context), + 0, + epochPollIntervalMs, + TimeUnit.MILLISECONDS) val dataReaderThread = new DataReaderThread(reader, queue, context) dataReaderThread.setDaemon(true) @@ -102,7 +106,7 @@ class ContinuousDataSourceRDD( context.addTaskCompletionListener(_ => { reader.close() dataReaderThread.interrupt() - epochPollThread.interrupt() + epochPollExecutor.shutdown() }) itr } @@ -114,33 +118,21 @@ class ContinuousDataSourceRDD( case class EpochPackedPartitionOffset(epoch: Long) extends PartitionOffset -class EpochPollThread( +class EpochPollRunnable( queue: BlockingQueue[(UnsafeRow, PartitionOffset)], - context: TaskContext, - epochPollIntervalMs: Long) + context: TaskContext) extends Thread with Logging { private val epochEndpoint = EpochCoordinatorRef.get( context.getLocalProperty(ContinuousExecution.RUN_ID_KEY), SparkEnv.get) private var currentEpoch = context.getLocalProperty(ContinuousExecution.START_EPOCH_KEY).toLong override def run(): Unit = { - try { - ProcessingTimeExecutor(ProcessingTime(epochPollIntervalMs), new SystemClock()) - .execute { () => - val newEpoch = epochEndpoint.askSync[Long](GetCurrentEpoch()) - for (i <- currentEpoch to newEpoch - 1) { - queue.put((null, null)) - logDebug(s"Sent marker to start epoch ${i + 1}") - } - currentEpoch = newEpoch - true - } - } catch { - case (_: InterruptedException | _: SparkException) if context.isInterrupted() => - // Continuous shutdown might interrupt us, or it might clean up the endpoint before - // interrupting us. Unfortunately, a missing endpoint just throws a generic SparkException. - // In either case, as long as the context shows interrupted, we can safely clean shutdown. + val newEpoch = epochEndpoint.askSync[Long](GetCurrentEpoch()) + for (i <- currentEpoch to newEpoch - 1) { + queue.put((null, null)) + logDebug(s"Sent marker to start epoch ${i + 1}") } + currentEpoch = newEpoch } } From 25362901379cb87f51b56ffc0bbb01e2e7d3262b Mon Sep 17 00:00:00 2001 From: Jose Torres Date: Thu, 14 Dec 2017 15:54:52 -0800 Subject: [PATCH 30/50] add tests for supported ops --- .../continuous/ContinuousSuite.scala | 51 +++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala index 187633c87fe7c..607ac5791e88f 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala @@ -99,6 +99,57 @@ class ContinuousSuite extends StreamTest { StopStream) } + test("map") { + val df = spark.readStream + .format("rate") + .option("numPartitions", "5") + .option("rowsPerSecond", "5") + .load() + .select('value) + .map(r => r.getLong(0) * 2) + + testStream(df, useV2Sink = true)( + StartStream(longContinuousTrigger), + AwaitEpoch(0), + Execute(waitForRateSourceTriggers(_, 2)), + IncrementEpoch(), + CheckAnswer(scala.Range(0, 20, 2): _*)) + } + + test("flatMap") { + val df = spark.readStream + .format("rate") + .option("numPartitions", "5") + .option("rowsPerSecond", "5") + .load() + .select('value) + .flatMap(r => Seq(0, r.getLong(0), r.getLong(0) * 2)) + + testStream(df, useV2Sink = true)( + StartStream(longContinuousTrigger), + AwaitEpoch(0), + Execute(waitForRateSourceTriggers(_, 2)), + IncrementEpoch(), + CheckAnswer(scala.Range(0, 10).flatMap(n => Seq(0, n, n * 2)): _*)) + } + + test("filter") { + val df = spark.readStream + .format("rate") + .option("numPartitions", "5") + .option("rowsPerSecond", "5") + .load() + .select('value) + .where('value > 5) + + testStream(df, useV2Sink = true)( + StartStream(longContinuousTrigger), + AwaitEpoch(0), + Execute(waitForRateSourceTriggers(_, 2)), + IncrementEpoch(), + CheckAnswer(scala.Range(6, 10): _*)) + } + test("repeatedly restart") { val df = spark.readStream .format("rate") From 2eb048dae5ca3538de17433abdc9ed3810cef34e Mon Sep 17 00:00:00 2001 From: Jose Torres Date: Thu, 14 Dec 2017 16:14:54 -0800 Subject: [PATCH 31/50] unsupported operation check for unsupported continuous mode ops --- .../analysis/UnsupportedOperationChecker.scala | 12 ++++++++++++ .../sql/streaming/StreamingQueryManager.scala | 1 + .../streaming/continuous/ContinuousSuite.scala | 17 +++++++++++++++++ 3 files changed, 30 insertions(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala index 04502d04d9509..39c2f7c9cab54 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala @@ -339,6 +339,18 @@ object UnsupportedOperationChecker { } } + def checkForContinuous(plan: LogicalPlan, outputMode: OutputMode): Unit = { + checkForStreaming(plan, outputMode) + + plan.foreachUp { + case (_: Project | _: Filter | _: MapElements | _: MapPartitions | + _: DeserializeToObject | _: SerializeFromObject) => + case node if node.nodeName == "StreamingRelationV2" => + case node => + throwError(s"Continuous processing does not support ${node.nodeName} operations.")(node) + } + } + private def throwErrorIf( condition: Boolean, msg: String)(implicit operator: LogicalPlan): Unit = { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala index dda19b8ded08b..e808ffaa96410 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala @@ -253,6 +253,7 @@ class StreamingQueryManager private[sql] (sparkSession: SparkSession) extends Lo outputMode, deleteCheckpointOnStop)) case v2Sink: ContinuousWriteSupport => + UnsupportedOperationChecker.checkForContinuous(analyzedPlan, outputMode) new StreamingQueryWrapper(new ContinuousExecution( sparkSession, userSpecifiedName.orNull, diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala index 607ac5791e88f..fa0e328f15e7e 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala @@ -150,6 +150,23 @@ class ContinuousSuite extends StreamTest { CheckAnswer(scala.Range(6, 10): _*)) } + test("deduplicate") { + val df = spark.readStream + .format("rate") + .option("numPartitions", "5") + .option("rowsPerSecond", "5") + .load() + .select('value) + .dropDuplicates() + + val except = intercept[AnalysisException] { + testStream(df, useV2Sink = true)(StartStream(longContinuousTrigger)) + } + + assert(except.message.contains( + "Continuous processing does not support Deduplicate operations.")) + } + test("repeatedly restart") { val df = spark.readStream .format("rate") From d175cc983128b330b3a0fb3bd48fd96d83ef111a Mon Sep 17 00:00:00 2001 From: Jose Torres Date: Thu, 14 Dec 2017 17:25:58 -0800 Subject: [PATCH 32/50] current timestamp test --- .../UnsupportedOperationChecker.scala | 25 +++++--- .../continuous/ContinuousSuite.scala | 57 ++++++++----------- 2 files changed, 42 insertions(+), 40 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala index 39c2f7c9cab54..b55043c270644 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala @@ -18,7 +18,7 @@ package org.apache.spark.sql.catalyst.analysis import org.apache.spark.sql.AnalysisException -import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, AttributeSet, MonotonicallyIncreasingID} +import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, AttributeSet, CurrentDate, CurrentTimestamp, MonotonicallyIncreasingID} import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression import org.apache.spark.sql.catalyst.planning.ExtractEquiJoinKeys import org.apache.spark.sql.catalyst.plans._ @@ -342,12 +342,23 @@ object UnsupportedOperationChecker { def checkForContinuous(plan: LogicalPlan, outputMode: OutputMode): Unit = { checkForStreaming(plan, outputMode) - plan.foreachUp { - case (_: Project | _: Filter | _: MapElements | _: MapPartitions | - _: DeserializeToObject | _: SerializeFromObject) => - case node if node.nodeName == "StreamingRelationV2" => - case node => - throwError(s"Continuous processing does not support ${node.nodeName} operations.")(node) + plan.foreachUp { implicit subPlan => + subPlan match { + case (_: Project | _: Filter | _: MapElements | _: MapPartitions | + _: DeserializeToObject | _: SerializeFromObject) => + case node if node.nodeName == "StreamingRelationV2" => + case node => + throwError(s"Continuous processing does not support ${node.nodeName} operations.") + } + + subPlan.expressions.foreach { e => + if (e.collectLeaves().exists { + case (_: CurrentTimestamp | _: CurrentDate) => true + case _ => false + }) { + throwError(s"Continuous processing does not support current time operations.") + } + } } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala index fa0e328f15e7e..7139f3b15f223 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala @@ -47,9 +47,7 @@ import org.apache.spark.sql.test.TestSparkSession import org.apache.spark.sql.types._ import org.apache.spark.util.Utils -class ContinuousSuite extends StreamTest { - import testImplicits._ - +class ContinuousSuiteBase extends StreamTest { // We need more than the default local[2] to be able to schedule all partitions simultaneously. override protected def createSparkSession = new TestSparkSession( new SparkContext( @@ -57,7 +55,7 @@ class ContinuousSuite extends StreamTest { "continuous-stream-test-sql-context", sparkConf.set("spark.sql.testkey", "true"))) - private def waitForRateSourceTriggers(query: StreamExecution, numTriggers: Int): Unit = { + protected def waitForRateSourceTriggers(query: StreamExecution, numTriggers: Int): Unit = { query match { case s: ContinuousExecution => assert(numTriggers >= 2, "must wait for at least 2 triggers to ensure query is initialized") @@ -74,7 +72,11 @@ class ContinuousSuite extends StreamTest { // A continuous trigger that will only fire the initial time for the duration of a test. // This allows clean testing with manual epoch advancement. - private val longContinuousTrigger = Trigger.Continuous("1 hour") + protected val longContinuousTrigger = Trigger.Continuous("1 hour") +} + +class ContinuousSuite extends ContinuousSuiteBase { + import testImplicits._ test("basic rate source") { val df = spark.readStream @@ -167,6 +169,22 @@ class ContinuousSuite extends StreamTest { "Continuous processing does not support Deduplicate operations.")) } + test("timestamp") { + val df = spark.readStream + .format("rate") + .option("numPartitions", "5") + .option("rowsPerSecond", "5") + .load() + .select(current_timestamp()) + + val except = intercept[AnalysisException] { + testStream(df, useV2Sink = true)(StartStream(longContinuousTrigger)) + } + + assert(except.message.contains( + "Continuous processing does not support current time operations.")) + } + test("repeatedly restart") { val df = spark.readStream .format("rate") @@ -217,36 +235,9 @@ class ContinuousSuite extends StreamTest { } } -class ContinuousStressSuite extends StreamTest { - +class ContinuousStressSuite extends ContinuousSuiteBase { import testImplicits._ - // We need more than the default local[2] to be able to schedule all partitions simultaneously. - override protected def createSparkSession = new TestSparkSession( - new SparkContext( - "local[10]", - "continuous-stream-test-sql-context", - sparkConf.set("spark.sql.testkey", "true"))) - - private def waitForRateSourceTriggers(query: StreamExecution, numTriggers: Int): Unit = { - query match { - case s: ContinuousExecution => - assert(numTriggers >= 2, "must wait for at least 2 triggers to ensure query is initialized") - val reader = s.lastExecution.executedPlan.collectFirst { - case DataSourceV2ScanExec(_, r: ContinuousRateStreamReader) => r - }.get - - val deltaMs = (numTriggers - 1) * 1000 + 300 - while (System.currentTimeMillis < reader.creationTime + deltaMs) { - Thread.sleep(reader.creationTime + deltaMs - System.currentTimeMillis) - } - } - } - - // A continuous trigger that will only fire the initial time for the duration of a test. - // This allows clean testing with manual epoch advancement. - private val longContinuousTrigger = Trigger.Continuous("1 hour") - test("only one epoch") { val df = spark.readStream .format("rate") From 2abfd0e285874079abe3e1d1a377aca85e72f404 Mon Sep 17 00:00:00 2001 From: Jose Torres Date: Fri, 15 Dec 2017 11:17:21 -0800 Subject: [PATCH 33/50] address trigger comments --- .../main/scala/org/apache/spark/sql/internal/SQLConf.scala | 4 ++-- .../main/java/org/apache/spark/sql/streaming/Trigger.java | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 60462cedd8c77..84fe4bb711a4e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -1057,7 +1057,7 @@ object SQLConf { .internal() .doc("The interval at which continuous execution readers will poll to check whether" + " the epoch has advanced on the driver.") - .intConf + .timeConf(TimeUnit.MILLISECONDS) .createWithDefault(100) object Deprecated { @@ -1375,7 +1375,7 @@ class SQLConf extends Serializable with Logging { def continuousStreamingExecutorQueueSize: Int = getConf(CONTINUOUS_STREAMING_EXECUTOR_QUEUE_SIZE) - def continuousStreamingExecutorPollIntervalMs: Int = + def continuousStreamingExecutorPollIntervalMs: Long = getConf(CONTINUOUS_STREAMING_EXECUTOR_POLL_INTERVAL_MS) /** ********************** SQLConf functionality methods ************ */ diff --git a/sql/core/src/main/java/org/apache/spark/sql/streaming/Trigger.java b/sql/core/src/main/java/org/apache/spark/sql/streaming/Trigger.java index cf4f0c6899ffc..9e3ad5033054a 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/streaming/Trigger.java +++ b/sql/core/src/main/java/org/apache/spark/sql/streaming/Trigger.java @@ -113,7 +113,7 @@ public static Trigger Continuous(long intervalMs) { * * {{{ * import java.util.concurrent.TimeUnit - * df.writeStream.trigger(ProcessingTime.create(10, TimeUnit.SECONDS)) + * df.writeStream.trigger(Trigger.Continuous(10, TimeUnit.SECONDS)) * }}} * * @since 2.3.0 @@ -131,7 +131,7 @@ public static Trigger Continuous(long interval, TimeUnit timeUnit) { * import scala.concurrent.duration._ * df.writeStream.trigger(Trigger.Continuous(10.seconds)) * }}} - * @since 2.2.0 + * @since 2.3.0 */ public static Trigger Continuous(Duration interval) { return ContinuousTrigger.apply(interval); @@ -144,7 +144,7 @@ public static Trigger Continuous(Duration interval) { * {{{ * df.writeStream.trigger(Trigger.Continuous("10 seconds")) * }}} - * @since 2.2.0 + * @since 2.3.0 */ public static Trigger Continuous(String interval) { return ContinuousTrigger.apply(interval); From 2d3fb96dfdf46e95cb358f331eae5ac1380ab088 Mon Sep 17 00:00:00 2001 From: Jose Torres Date: Fri, 15 Dec 2017 11:48:36 -0800 Subject: [PATCH 34/50] update mima excludes for private[sql] method change --- project/MimaExcludes.scala | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala index 9902fedb65d59..81584af6813ea 100644 --- a/project/MimaExcludes.scala +++ b/project/MimaExcludes.scala @@ -36,6 +36,11 @@ object MimaExcludes { // Exclude rules for 2.3.x lazy val v23excludes = v22excludes ++ Seq( + // SPARK-22789: Map-only continuous processing execution + ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.sql.streaming.StreamingQueryManager.startQuery$default$8"), + ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.streaming.StreamingQueryManager.startQuery$default$6"), + ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.sql.streaming.StreamingQueryManager.startQuery$default$9"), + // SPARK-22372: Make cluster submission use SparkApplication. ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.deploy.SparkHadoopUtil.getSecretKeyFromUserCredentials"), ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.deploy.SparkHadoopUtil.isYarnMode"), From 24fed794508949d5efc36477b14111f02297b24b Mon Sep 17 00:00:00 2001 From: Jose Torres Date: Fri, 15 Dec 2017 11:58:27 -0800 Subject: [PATCH 35/50] remove check microbatch is in registry --- .../sql/execution/streaming/RateSourceV2Suite.scala | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/RateSourceV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/RateSourceV2Suite.scala index dde41713c47a8..dc833b2ccaa22 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/RateSourceV2Suite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/RateSourceV2Suite.scala @@ -29,16 +29,6 @@ import org.apache.spark.sql.sources.v2.{ContinuousReadSupport, DataSourceV2Optio import org.apache.spark.sql.streaming.StreamTest class RateSourceV2Suite extends StreamTest { - test("microbatch in registry") { - DataSource.lookupDataSource("rate", spark.sqlContext.conf).newInstance() match { - case ds: MicroBatchReadSupport => - val reader = ds.createMicroBatchReader(Optional.empty(), "", DataSourceV2Options.empty()) - assert(reader.isInstanceOf[RateStreamV2Reader]) - case _ => - throw new IllegalStateException("Could not find v2 read support for rate") - } - } - test("microbatch - numPartitions propagated") { val reader = new RateStreamV2Reader( new DataSourceV2Options(Map("numPartitions" -> "11", "rowsPerSecond" -> "33").asJava)) From c65120639fbd98fac7fe4bede93a178650bcc852 Mon Sep 17 00:00:00 2001 From: Jose Torres Date: Fri, 15 Dec 2017 15:15:25 -0800 Subject: [PATCH 36/50] fix computeStats test --- .../spark/sql/streaming/StreamSuite.scala | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala index 0db2022dc8d16..c65e5d3dd75c2 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala @@ -77,10 +77,23 @@ class StreamSuite extends StreamTest { } test("StreamingRelation.computeStats") { - val streamingRelation = spark.readStream.format("memory").load().logicalPlan collect { - case s: StreamingRelation => s + withTempDir { dir => + val df = spark.readStream.format("csv").schema(StructType(Seq())).load(dir.getCanonicalPath) + val streamingRelation = df.logicalPlan collect { + case s: StreamingRelation => s + } + assert(streamingRelation.nonEmpty, "cannot find StreamingRelation") + assert( + streamingRelation.head.computeStats.sizeInBytes == + spark.sessionState.conf.defaultSizeInBytes) + } + } + + test("StreamingRelationV2.computeStats") { + val streamingRelation = spark.readStream.format("rate").load().logicalPlan collect { + case s: StreamingRelationV2 => s } - assert(streamingRelation.nonEmpty, "cannot find StreamingRelation") + assert(streamingRelation.nonEmpty, "cannot find StreamingExecutionRelation") assert( streamingRelation.head.computeStats.sizeInBytes == spark.sessionState.conf.defaultSizeInBytes) } From 6c5870b78d9773c9bc6f91d9e013f9955a697467 Mon Sep 17 00:00:00 2001 From: Jose Torres Date: Mon, 18 Dec 2017 16:13:57 -0800 Subject: [PATCH 37/50] fail task if subthreads fail --- .../ContinuousDataSourceRDDIter.scala | 78 ++++++++++++------- 1 file changed, 51 insertions(+), 27 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousDataSourceRDDIter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousDataSourceRDDIter.scala index 5c4a823652177..934222f45ec4e 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousDataSourceRDDIter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousDataSourceRDDIter.scala @@ -58,8 +58,26 @@ class ContinuousDataSourceRDD( // (null, null) is an allowed input to the queue, representing an epoch boundary. val queue = new ArrayBlockingQueue[(UnsafeRow, PartitionOffset)](dataQueueSize) + val epochPollFailed = new AtomicBoolean(false) + val epochPollExecutor = ThreadUtils.newDaemonSingleThreadScheduledExecutor( + s"epoch-poll--${runId}--${context.partitionId()}") + val epochPollRunnable = new EpochPollRunnable(queue, context, epochPollFailed) + epochPollExecutor.scheduleWithFixedDelay( + epochPollRunnable, 0, epochPollIntervalMs, TimeUnit.MILLISECONDS) + + val dataReaderFailed = new AtomicBoolean(false) + val dataReaderThread = new DataReaderThread(reader, queue, context, dataReaderFailed) + dataReaderThread.setDaemon(true) + dataReaderThread.start() + + context.addTaskCompletionListener(_ => { + reader.close() + dataReaderThread.interrupt() + epochPollExecutor.shutdown() + }) + val epochEndpoint = EpochCoordinatorRef.get(runId, SparkEnv.get) - val itr = new Iterator[UnsafeRow] { + new Iterator[UnsafeRow] { private var currentRow: UnsafeRow = _ private var currentOffset: PartitionOffset = ContinuousDataSourceRDD.getBaseReader(reader).getOffset @@ -67,6 +85,13 @@ class ContinuousDataSourceRDD( context.getLocalProperty(ContinuousExecution.START_EPOCH_KEY).toLong override def hasNext(): Boolean = { + if (dataReaderFailed.get()) { + throw new SparkException("data read failed", dataReaderThread.failureReason) + } + if (epochPollFailed.get()) { + throw new SparkException("epoch poll failed", epochPollRunnable.failureReason) + } + queue.take() match { // epoch boundary marker case (null, null) => @@ -90,25 +115,6 @@ class ContinuousDataSourceRDD( r } } - - val epochPollExecutor = ThreadUtils.newDaemonSingleThreadScheduledExecutor( - s"epoch-poll--${runId}--${context.partitionId()}") - epochPollExecutor.scheduleWithFixedDelay( - new EpochPollRunnable(queue, context), - 0, - epochPollIntervalMs, - TimeUnit.MILLISECONDS) - - val dataReaderThread = new DataReaderThread(reader, queue, context) - dataReaderThread.setDaemon(true) - dataReaderThread.start() - - context.addTaskCompletionListener(_ => { - reader.close() - dataReaderThread.interrupt() - epochPollExecutor.shutdown() - }) - itr } override def getPreferredLocations(split: Partition): Seq[String] = { @@ -120,26 +126,39 @@ case class EpochPackedPartitionOffset(epoch: Long) extends PartitionOffset class EpochPollRunnable( queue: BlockingQueue[(UnsafeRow, PartitionOffset)], - context: TaskContext) + context: TaskContext, + failedFlag: AtomicBoolean) extends Thread with Logging { + private[continuous] var failureReason: Throwable = _ + private val epochEndpoint = EpochCoordinatorRef.get( context.getLocalProperty(ContinuousExecution.RUN_ID_KEY), SparkEnv.get) private var currentEpoch = context.getLocalProperty(ContinuousExecution.START_EPOCH_KEY).toLong override def run(): Unit = { - val newEpoch = epochEndpoint.askSync[Long](GetCurrentEpoch()) - for (i <- currentEpoch to newEpoch - 1) { - queue.put((null, null)) - logDebug(s"Sent marker to start epoch ${i + 1}") + try { + val newEpoch = epochEndpoint.askSync[Long](GetCurrentEpoch()) + for (i <- currentEpoch to newEpoch - 1) { + queue.put((null, null)) + logDebug(s"Sent marker to start epoch ${i + 1}") + } + currentEpoch = newEpoch + } catch { + case t: Throwable => + failedFlag.set(true) + failureReason = t + throw t } - currentEpoch = newEpoch } } class DataReaderThread( reader: DataReader[UnsafeRow], queue: BlockingQueue[(UnsafeRow, PartitionOffset)], - context: TaskContext) extends Thread { + context: TaskContext, + failedFlag: AtomicBoolean) extends Thread { + private[continuous] var failureReason: Throwable = _ + override def run(): Unit = { val baseReader = ContinuousDataSourceRDD.getBaseReader(reader) try { @@ -160,6 +179,11 @@ class DataReaderThread( case _: InterruptedException if context.isInterrupted() => // Continuous shutdown always involves an interrupt; shut down quietly. return + + case t: Throwable => + failedFlag.set(true) + failureReason = t + throw t } } } From 12f29554e90c730a44f2958b02c58732ae370a87 Mon Sep 17 00:00:00 2001 From: Jose Torres Date: Mon, 18 Dec 2017 19:57:30 -0800 Subject: [PATCH 38/50] fix race conditions --- .../sql/execution/streaming/HDFSMetadataLog.scala | 15 +++++++++++++++ .../continuous/ContinuousDataSourceRDDIter.scala | 10 +++++++--- .../continuous/ContinuousExecution.scala | 5 +++++ .../streaming/continuous/ContinuousSuite.scala | 10 +++++----- 4 files changed, 32 insertions(+), 8 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala index 43cf0ef1da8ca..d9eb31a1f2317 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala @@ -266,6 +266,21 @@ class HDFSMetadataLog[T <: AnyRef : ClassTag](sparkSession: SparkSession, path: } } + /** + * Removes all log entries later than thresholdBatchId (exclusive). + */ + def purgeAfter(thresholdBatchId: Long): Unit = { + val batchIds = fileManager.list(metadataPath, batchFilesFilter) + .map(f => pathToBatchId(f.getPath)) + + for (batchId <- batchIds if batchId > thresholdBatchId) { + print(s"AAAAA purging\n") + val path = batchIdToPath(batchId) + fileManager.delete(path) + logTrace(s"Removed metadata log file: $path") + } + } + private def createFileManager(): FileManager = { val hadoopConf = sparkSession.sessionState.newHadoopConf() try { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousDataSourceRDDIter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousDataSourceRDDIter.scala index 934222f45ec4e..8d23dec05a0f9 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousDataSourceRDDIter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousDataSourceRDDIter.scala @@ -55,7 +55,9 @@ class ContinuousDataSourceRDD( val runId = context.getLocalProperty(ContinuousExecution.RUN_ID_KEY) - // (null, null) is an allowed input to the queue, representing an epoch boundary. + // This queue contains two types of messages: + // * (null, null) representing an epoch boundary. + // * (row, off) containing a data row and its corresponding PartitionOffset. val queue = new ArrayBlockingQueue[(UnsafeRow, PartitionOffset)](dataQueueSize) val epochPollFailed = new AtomicBoolean(false) @@ -65,6 +67,9 @@ class ContinuousDataSourceRDD( epochPollExecutor.scheduleWithFixedDelay( epochPollRunnable, 0, epochPollIntervalMs, TimeUnit.MILLISECONDS) + // Important sequencing - we must get start offset before the data reader thread begins + val startOffset = ContinuousDataSourceRDD.getBaseReader(reader).getOffset + val dataReaderFailed = new AtomicBoolean(false) val dataReaderThread = new DataReaderThread(reader, queue, context, dataReaderFailed) dataReaderThread.setDaemon(true) @@ -79,8 +84,7 @@ class ContinuousDataSourceRDD( val epochEndpoint = EpochCoordinatorRef.get(runId, SparkEnv.get) new Iterator[UnsafeRow] { private var currentRow: UnsafeRow = _ - private var currentOffset: PartitionOffset = - ContinuousDataSourceRDD.getBaseReader(reader).getOffset + private var currentOffset: PartitionOffset = startOffset private var currentEpoch = context.getLocalProperty(ContinuousExecution.START_EPOCH_KEY).toLong diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala index 2ee900bd988fe..77c3279dc7d0b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala @@ -119,6 +119,11 @@ class ContinuousExecution( } committedOffsets = nextOffsets.toStreamProgress(sources) + // Forcibly align commit and offset logs by slicing off any spurious offset logs from + // a previous run. We can't allow commits to an epoch that a previous run reached but + // this run has not. + offsetLog.purgeAfter(latestEpochId) + currentBatchId = latestEpochId + 1 logDebug(s"Resuming at epoch $currentBatchId with committed offsets $committedOffsets") nextOffsets diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala index 7139f3b15f223..b53a7276637e1 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala @@ -316,14 +316,14 @@ class ContinuousStressSuite extends ContinuousSuiteBase { AwaitEpoch(50), Execute { query => // Because we have automatic advancement, we can't reliably check where precisely the last - // commit happened. And we don't have exactly once processing, meaning values may be + // commit happened. And we don't have exactly once prøocessing, meaning values may be // duplicated. So we just check all values below the highest are present, and as a // sanity check that we got at least up to the 50th trigger. val data = query.sink.asInstanceOf[MemorySinkV2].allData - val vals = data.map(_.getLong(0)).toSet - assert(scala.Range(0, 25000).forall { i => - vals.contains(i) - }) + val vals = data.map(_.getLong(0)).sorted + scala.Range(0, 25000).foreach { i => + assert(vals.contains(i), s"$i was missing from result data") + } }) } } From c00b0995d43c7c730f3c4ec49a2a54fa5a61ac11 Mon Sep 17 00:00:00 2001 From: Jose Torres Date: Mon, 18 Dec 2017 20:21:30 -0800 Subject: [PATCH 39/50] make sure each op can advance epoch in test --- .../sql/streaming/continuous/ContinuousSuite.scala | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala index b53a7276637e1..c59943888d0ef 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala @@ -115,7 +115,9 @@ class ContinuousSuite extends ContinuousSuiteBase { AwaitEpoch(0), Execute(waitForRateSourceTriggers(_, 2)), IncrementEpoch(), - CheckAnswer(scala.Range(0, 20, 2): _*)) + Execute(waitForRateSourceTriggers(_, 4)), + IncrementEpoch(), + CheckAnswer(scala.Range(0, 40, 2): _*)) } test("flatMap") { @@ -132,7 +134,9 @@ class ContinuousSuite extends ContinuousSuiteBase { AwaitEpoch(0), Execute(waitForRateSourceTriggers(_, 2)), IncrementEpoch(), - CheckAnswer(scala.Range(0, 10).flatMap(n => Seq(0, n, n * 2)): _*)) + Execute(waitForRateSourceTriggers(_, 4)), + IncrementEpoch(), + CheckAnswer(scala.Range(0, 20).flatMap(n => Seq(0, n, n * 2)): _*)) } test("filter") { @@ -149,7 +153,9 @@ class ContinuousSuite extends ContinuousSuiteBase { AwaitEpoch(0), Execute(waitForRateSourceTriggers(_, 2)), IncrementEpoch(), - CheckAnswer(scala.Range(6, 10): _*)) + Execute(waitForRateSourceTriggers(_, 4)), + IncrementEpoch(), + CheckAnswer(scala.Range(6, 20): _*)) } test("deduplicate") { From 3330ae492dcf08a8c07178c3ba8125cf489137bb Mon Sep 17 00:00:00 2001 From: Jose Torres Date: Wed, 20 Dec 2017 14:20:32 -0800 Subject: [PATCH 40/50] handle data reader failure when queue is empty --- .../execution/streaming/HDFSMetadataLog.scala | 1 - .../execution/streaming/StreamExecution.scala | 2 +- .../ContinuousDataSourceRDDIter.scala | 21 +++++++++++++------ 3 files changed, 16 insertions(+), 8 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala index d9eb31a1f2317..6e8154d58d4c6 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala @@ -274,7 +274,6 @@ class HDFSMetadataLog[T <: AnyRef : ClassTag](sparkSession: SparkSession, path: .map(f => pathToBatchId(f.getPath)) for (batchId <- batchIds if batchId > thresholdBatchId) { - print(s"AAAAA purging\n") val path = batchIdToPath(batchId) fileManager.delete(path) logTrace(s"Removed metadata log file: $path") diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala index ccace1f799e0c..19c8f29c23367 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala @@ -298,7 +298,7 @@ abstract class StreamExecution( e, committedOffsets.toOffsetSeq(sources, offsetSeqMetadata).toString, availableOffsets.toOffsetSeq(sources, offsetSeqMetadata).toString) - // logError(s"Query $prettyIdString terminated with error", e) + logError(s"Query $prettyIdString terminated with error", e) updateStatusMessage(s"Terminated with exception: ${e.getMessage}") // Rethrow the fatal errors to allow the user using `Thread.UncaughtExceptionHandler` to // handle them diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousDataSourceRDDIter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousDataSourceRDDIter.scala index 8d23dec05a0f9..f6f9f90260783 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousDataSourceRDDIter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousDataSourceRDDIter.scala @@ -83,20 +83,29 @@ class ContinuousDataSourceRDD( val epochEndpoint = EpochCoordinatorRef.get(runId, SparkEnv.get) new Iterator[UnsafeRow] { + private val POLL_TIMEOUT_MS = 1000 + private var currentRow: UnsafeRow = _ private var currentOffset: PartitionOffset = startOffset private var currentEpoch = context.getLocalProperty(ContinuousExecution.START_EPOCH_KEY).toLong override def hasNext(): Boolean = { - if (dataReaderFailed.get()) { - throw new SparkException("data read failed", dataReaderThread.failureReason) - } - if (epochPollFailed.get()) { - throw new SparkException("epoch poll failed", epochPollRunnable.failureReason) + var entry: (UnsafeRow, PartitionOffset) = null + while (entry == null) { + if (context.isInterrupted() || context.isCompleted()) { + entry = (null, null) + } + if (dataReaderFailed.get()) { + throw new SparkException("data read failed", dataReaderThread.failureReason) + } + if (epochPollFailed.get()) { + throw new SparkException("epoch poll failed", epochPollRunnable.failureReason) + } + entry = queue.poll(POLL_TIMEOUT_MS, TimeUnit.MILLISECONDS) } - queue.take() match { + entry match { // epoch boundary marker case (null, null) => epochEndpoint.send(ReportPartitionOffset( From 2f902f2e9e13f89a7c47fea0645330082ccee60a Mon Sep 17 00:00:00 2001 From: Jose Torres Date: Wed, 20 Dec 2017 14:21:26 -0800 Subject: [PATCH 41/50] set failure reason before flag --- .../streaming/continuous/ContinuousDataSourceRDDIter.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousDataSourceRDDIter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousDataSourceRDDIter.scala index f6f9f90260783..2f74a0c8d9288 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousDataSourceRDDIter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousDataSourceRDDIter.scala @@ -158,8 +158,8 @@ class EpochPollRunnable( currentEpoch = newEpoch } catch { case t: Throwable => - failedFlag.set(true) failureReason = t + failedFlag.set(true) throw t } } @@ -194,8 +194,8 @@ class DataReaderThread( return case t: Throwable => - failedFlag.set(true) failureReason = t + failedFlag.set(true) throw t } } From 047d48b8d1ef42d72d53758c62f5afa9f4fba49d Mon Sep 17 00:00:00 2001 From: Jose Torres Date: Wed, 20 Dec 2017 14:24:56 -0800 Subject: [PATCH 42/50] don't throw in data reader thread --- .../streaming/continuous/ContinuousDataSourceRDDIter.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousDataSourceRDDIter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousDataSourceRDDIter.scala index 2f74a0c8d9288..4457e01c3566f 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousDataSourceRDDIter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousDataSourceRDDIter.scala @@ -190,13 +190,13 @@ class DataReaderThread( } } catch { case _: InterruptedException if context.isInterrupted() => - // Continuous shutdown always involves an interrupt; shut down quietly. - return + // Continuous shutdown always involves an interrupt; do nothing and shut down quietly. case t: Throwable => failureReason = t failedFlag.set(true) - throw t + // Don't rethrow the exception in this thread. It's not needed, and the default Spark + // exception handler will kill the executor. } } } From a04978e53d41f0969978aa09dd670926d6d4cae9 Mon Sep 17 00:00:00 2001 From: Jose Torres Date: Wed, 20 Dec 2017 14:28:48 -0800 Subject: [PATCH 43/50] set thread name --- .../streaming/continuous/ContinuousDataSourceRDDIter.scala | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousDataSourceRDDIter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousDataSourceRDDIter.scala index 4457e01c3566f..491eb29a25d26 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousDataSourceRDDIter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousDataSourceRDDIter.scala @@ -169,7 +169,10 @@ class DataReaderThread( reader: DataReader[UnsafeRow], queue: BlockingQueue[(UnsafeRow, PartitionOffset)], context: TaskContext, - failedFlag: AtomicBoolean) extends Thread { + failedFlag: AtomicBoolean) + extends Thread( + s"continuous-reader--${context.partitionId()}--" + + s"${context.getLocalProperty(ContinuousExecution.RUN_ID_KEY)}") { private[continuous] var failureReason: Throwable = _ override def run(): Unit = { From b672370b52379583f66483c48d952e3a1579f4c9 Mon Sep 17 00:00:00 2001 From: Jose Torres Date: Wed, 20 Dec 2017 14:46:19 -0800 Subject: [PATCH 44/50] only check InterruptedException --- .../execution/streaming/continuous/ContinuousExecution.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala index 77c3279dc7d0b..2585209a1d986 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala @@ -81,7 +81,7 @@ class ContinuousExecution( try { runContinuous(sparkSessionForStream) } catch { - case _: Throwable if state.get().equals(RECONFIGURING) => + case _: InterruptedException if state.get().equals(RECONFIGURING) => // swallow exception and run again state.set(ACTIVE) } From e4d6e9da667107a04b6df7142ffdec36f3b1ed5c Mon Sep 17 00:00:00 2001 From: Jose Torres Date: Wed, 20 Dec 2017 14:48:18 -0800 Subject: [PATCH 45/50] move import --- .../execution/streaming/continuous/ContinuousExecution.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala index 2585209a1d986..e6b088cf9c437 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala @@ -19,6 +19,7 @@ package org.apache.spark.sql.execution.streaming.continuous import java.util.concurrent.TimeUnit +import scala.collection.JavaConverters._ import scala.collection.mutable.{ArrayBuffer, Map => MutableMap} import org.apache.spark.SparkEnv @@ -140,7 +141,6 @@ class ContinuousExecution( * @param sparkSessionForQuery Isolated [[SparkSession]] to run the continuous query with. */ private def runContinuous(sparkSessionForQuery: SparkSession): Unit = { - import scala.collection.JavaConverters._ // A list of attributes that will need to be updated. val replacements = new ArrayBuffer[(Attribute, Attribute)] // Translate from continuous relation to the underlying data source. From 35a72c7d2d88b72ef2132b717a50d155b95d5e07 Mon Sep 17 00:00:00 2001 From: Jose Torres Date: Wed, 20 Dec 2017 14:51:04 -0800 Subject: [PATCH 46/50] case object instead of case class --- .../sql/execution/streaming/continuous/EpochCoordinator.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/EpochCoordinator.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/EpochCoordinator.scala index 3afe6bb417415..903b77eb6dfed 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/EpochCoordinator.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/EpochCoordinator.scala @@ -36,7 +36,7 @@ private[continuous] sealed trait EpochCoordinatorMessage extends Serializable /** * Atomically increment the current epoch and get the new value. */ -private[sql] case class IncrementAndGetEpoch() extends EpochCoordinatorMessage +private[sql] case object IncrementAndGetEpoch extends EpochCoordinatorMessage // Init messages /** @@ -50,7 +50,7 @@ case class SetWriterPartitions(numPartitions: Int) extends EpochCoordinatorMessa /** * Get the current epoch. */ -private[sql] case class GetCurrentEpoch() extends EpochCoordinatorMessage +private[sql] case object GetCurrentEpoch extends EpochCoordinatorMessage /** * Commit a partition at the specified epoch with the given message. */ From 004f86532ee0549d4e30e93b9f34b9e23d77137c Mon Sep 17 00:00:00 2001 From: Jose Torres Date: Wed, 20 Dec 2017 15:01:41 -0800 Subject: [PATCH 47/50] fulfil contract for iterator --- .../ContinuousDataSourceRDDIter.scala | 22 +++++++++---------- .../continuous/ContinuousExecution.scala | 2 +- .../continuous/EpochCoordinator.scala | 4 ++-- .../spark/sql/streaming/StreamTest.scala | 2 +- 4 files changed, 15 insertions(+), 15 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousDataSourceRDDIter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousDataSourceRDDIter.scala index 491eb29a25d26..89fb2ace20917 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousDataSourceRDDIter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousDataSourceRDDIter.scala @@ -85,16 +85,15 @@ class ContinuousDataSourceRDD( new Iterator[UnsafeRow] { private val POLL_TIMEOUT_MS = 1000 - private var currentRow: UnsafeRow = _ + private var currentEntry: (UnsafeRow, PartitionOffset) = _ private var currentOffset: PartitionOffset = startOffset private var currentEpoch = context.getLocalProperty(ContinuousExecution.START_EPOCH_KEY).toLong override def hasNext(): Boolean = { - var entry: (UnsafeRow, PartitionOffset) = null - while (entry == null) { + while (currentEntry == null) { if (context.isInterrupted() || context.isCompleted()) { - entry = (null, null) + currentEntry = (null, null) } if (dataReaderFailed.get()) { throw new SparkException("data read failed", dataReaderThread.failureReason) @@ -102,10 +101,10 @@ class ContinuousDataSourceRDD( if (epochPollFailed.get()) { throw new SparkException("epoch poll failed", epochPollRunnable.failureReason) } - entry = queue.poll(POLL_TIMEOUT_MS, TimeUnit.MILLISECONDS) + currentEntry = queue.poll(POLL_TIMEOUT_MS, TimeUnit.MILLISECONDS) } - entry match { + currentEntry match { // epoch boundary marker case (null, null) => epochEndpoint.send(ReportPartitionOffset( @@ -113,18 +112,19 @@ class ContinuousDataSourceRDD( currentEpoch, currentOffset)) currentEpoch += 1 + currentEntry = null false // real row - case (row, offset) => - currentRow = row + case (_, offset) => currentOffset = offset true } } override def next(): UnsafeRow = { - val r = currentRow - currentRow = null + if (currentEntry == null) throw new NoSuchElementException("No current row was set") + val r = currentEntry._1 + currentEntry = null r } } @@ -150,7 +150,7 @@ class EpochPollRunnable( override def run(): Unit = { try { - val newEpoch = epochEndpoint.askSync[Long](GetCurrentEpoch()) + val newEpoch = epochEndpoint.askSync[Long](GetCurrentEpoch) for (i <- currentEpoch to newEpoch - 1) { queue.put((null, null)) logDebug(s"Sent marker to start epoch ${i + 1}") diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala index e6b088cf9c437..584d211a28dc4 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala @@ -236,7 +236,7 @@ class ContinuousExecution( } false } else if (isActive) { - currentBatchId = epochEndpoint.askSync[Long](IncrementAndGetEpoch()) + currentBatchId = epochEndpoint.askSync[Long](IncrementAndGetEpoch) logInfo(s"New epoch $currentBatchId is starting.") true } else { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/EpochCoordinator.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/EpochCoordinator.scala index 903b77eb6dfed..80db3692a09b0 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/EpochCoordinator.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/EpochCoordinator.scala @@ -175,12 +175,12 @@ private[continuous] class EpochCoordinator( } override def receiveAndReply(context: RpcCallContext): PartialFunction[Any, Unit] = { - case GetCurrentEpoch() => + case GetCurrentEpoch => val result = currentDriverEpoch logDebug(s"Epoch $result") context.reply(result) - case IncrementAndGetEpoch() => + case IncrementAndGetEpoch => currentDriverEpoch += 1 context.reply(currentDriverEpoch) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala index e2b1aa3c74825..8ac92dfe1951c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala @@ -252,7 +252,7 @@ trait StreamTest extends QueryTest with SharedSQLContext with TimeLimits with Be Execute { case s: ContinuousExecution => val newEpoch = EpochCoordinatorRef.get(s.runId.toString, SparkEnv.get) - .askSync[Long](IncrementAndGetEpoch()) + .askSync[Long](IncrementAndGetEpoch) s.awaitEpoch(newEpoch - 1) case _ => throw new IllegalStateException("microbatch cannot increment epoch") } From 825d437fe1e897c2047171ce78c6bb92805dc5be Mon Sep 17 00:00:00 2001 From: Jose Torres Date: Wed, 20 Dec 2017 16:07:05 -0800 Subject: [PATCH 48/50] fix compile after rebase --- .../spark/sql/sources/v2/reader/ContinuousReader.java | 6 ++++++ .../spark/sql/sources/v2/reader/MicroBatchReader.java | 6 ++++++ .../org/apache/spark/sql/execution/SparkStrategies.scala | 1 + .../sql/execution/streaming/BaseStreamingSource.java | 8 -------- .../sql/execution/streaming/MicroBatchExecution.scala | 2 +- .../streaming/continuous/ContinuousRateStreamSource.scala | 2 +- .../execution/streaming/sources/RateStreamSourceV2.scala | 2 +- .../org/apache/spark/sql/streaming/DataStreamWriter.scala | 1 + .../scala/org/apache/spark/sql/streaming/StreamTest.scala | 1 + .../spark/sql/streaming/continuous/ContinuousSuite.scala | 1 + 10 files changed, 19 insertions(+), 11 deletions(-) diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/ContinuousReader.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/ContinuousReader.java index 1baf82c2df762..34141d6cd85fd 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/ContinuousReader.java +++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/ContinuousReader.java @@ -65,4 +65,10 @@ public interface ContinuousReader extends BaseStreamingSource, DataSourceV2Reade default boolean needsReconfiguration() { return false; } + + /** + * Informs the source that Spark has completed processing all data for offsets less than or + * equal to `end` and will only request offsets greater than `end` in the future. + */ + void commit(Offset end); } diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/MicroBatchReader.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/MicroBatchReader.java index 438e3f55b7bcf..bd15c07d87f6c 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/MicroBatchReader.java +++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/MicroBatchReader.java @@ -61,4 +61,10 @@ public interface MicroBatchReader extends DataSourceV2Reader, BaseStreamingSourc * @throws IllegalArgumentException if the JSON does not encode a valid offset for this reader */ Offset deserializeOffset(String json); + + /** + * Informs the source that Spark has completed processing all data for offsets less than or + * equal to `end` and will only request offsets greater than `end` in the future. + */ + void commit(Offset end); } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala index 4bfd13610b6d6..8c6c324d456c7 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala @@ -31,6 +31,7 @@ import org.apache.spark.sql.execution.command._ import org.apache.spark.sql.execution.exchange.ShuffleExchangeExec import org.apache.spark.sql.execution.joins.{BuildLeft, BuildRight, BuildSide} import org.apache.spark.sql.execution.streaming._ +import org.apache.spark.sql.execution.streaming.sources.MemoryPlanV2 import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.streaming.StreamingQuery import org.apache.spark.sql.types.StructType diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/BaseStreamingSource.java b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/BaseStreamingSource.java index 3a02cbfe7afe3..c44b8af2552f0 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/BaseStreamingSource.java +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/BaseStreamingSource.java @@ -17,8 +17,6 @@ package org.apache.spark.sql.execution.streaming; -import org.apache.spark.sql.sources.v2.reader.Offset; - /** * The shared interface between V1 streaming sources and V2 streaming readers. * @@ -26,12 +24,6 @@ * directly, and will be removed in future versions. */ public interface BaseStreamingSource { - /** - * Informs the source that Spark has completed processing all data for offsets less than or - * equal to `end` and will only request offsets greater than `end` in the future. - */ - void commit(Offset end); - /** Stop this source and free any resources it has allocated. */ void stop(); } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala index 7b1d4a7149e21..20f9810faa5c8 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala @@ -316,7 +316,7 @@ class MicroBatchExecution( val prevBatchOff = offsetLog.get(currentBatchId - 1) if (prevBatchOff.isDefined) { prevBatchOff.get.toStreamProgress(sources).foreach { - case (src, off) => src.commit(off) + case (src: Source, off) => src.commit(off) } } else { throw new IllegalStateException(s"batch $currentBatchId doesn't exist") diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousRateStreamSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousRateStreamSource.scala index 1ce22c994aa8e..89a8562b4b59e 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousRateStreamSource.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousRateStreamSource.scala @@ -25,7 +25,7 @@ import org.json4s.jackson.Serialization import org.apache.spark.sql.Row import org.apache.spark.sql.catalyst.expressions.UnsafeRow import org.apache.spark.sql.catalyst.util.DateTimeUtils -import org.apache.spark.sql.execution.streaming.{RateSourceProvider, RateStreamOffset} +import org.apache.spark.sql.execution.streaming.{RateSourceProvider, RateStreamOffset, ValueRunTimeMsPair} import org.apache.spark.sql.execution.streaming.sources.RateStreamSourceV2 import org.apache.spark.sql.sources.v2.{ContinuousReadSupport, DataSourceV2, DataSourceV2Options} import org.apache.spark.sql.sources.v2.reader._ diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamSourceV2.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamSourceV2.scala index 7687dd146802d..1c66aed8690a7 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamSourceV2.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamSourceV2.scala @@ -27,7 +27,7 @@ import org.json4s.jackson.Serialization import org.apache.spark.sql.Row import org.apache.spark.sql.catalyst.util.DateTimeUtils -import org.apache.spark.sql.execution.streaming.RateStreamOffset +import org.apache.spark.sql.execution.streaming.{RateStreamOffset, ValueRunTimeMsPair} import org.apache.spark.sql.sources.v2.DataSourceV2Options import org.apache.spark.sql.sources.v2.reader._ import org.apache.spark.sql.types.{LongType, StructField, StructType, TimestampType} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala index 573ca9a85ccaa..db588ae282f38 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala @@ -28,6 +28,7 @@ import org.apache.spark.sql.execution.command.DDLUtils import org.apache.spark.sql.execution.datasources.DataSource import org.apache.spark.sql.execution.streaming._ import org.apache.spark.sql.execution.streaming.continuous.ContinuousTrigger +import org.apache.spark.sql.execution.streaming.sources.{MemoryPlanV2, MemorySinkV2} /** * Interface used to write a streaming `Dataset` to external storage systems (e.g. file systems, diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala index 8ac92dfe1951c..08d7fde2ac673 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala @@ -40,6 +40,7 @@ import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.catalyst.util._ import org.apache.spark.sql.execution.streaming._ import org.apache.spark.sql.execution.streaming.continuous.{ContinuousExecution, EpochCoordinatorRef, IncrementAndGetEpoch} +import org.apache.spark.sql.execution.streaming.sources.MemorySinkV2 import org.apache.spark.sql.execution.streaming.state.StateStore import org.apache.spark.sql.streaming.StreamingQueryListener._ import org.apache.spark.sql.test.SharedSQLContext diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala index c59943888d0ef..46003f7cd1bba 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala @@ -37,6 +37,7 @@ import org.apache.spark.sql.execution.command.ExplainCommand import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2ScanExec, WriteToDataSourceV2Exec} import org.apache.spark.sql.execution.streaming._ import org.apache.spark.sql.execution.streaming.continuous._ +import org.apache.spark.sql.execution.streaming.sources.MemorySinkV2 import org.apache.spark.sql.execution.streaming.state.{StateStore, StateStoreConf, StateStoreId, StateStoreProvider} import org.apache.spark.sql.functions._ import org.apache.spark.sql.internal.SQLConf From 07a9e0654df61ad52f7db28ee663a380dee3c2a8 Mon Sep 17 00:00:00 2001 From: Jose Torres Date: Thu, 21 Dec 2017 15:37:34 -0800 Subject: [PATCH 49/50] address review comments --- .../apache/spark/sql/streaming/Trigger.java | 2 +- .../datasources/v2/WriteToDataSourceV2.scala | 6 +- .../execution/streaming/StreamExecution.scala | 2 +- .../continuous/ContinuousExecution.scala | 24 +++++--- .../continuous/EpochCoordinator.scala | 9 +-- .../org/apache/spark/sql/QueryTest.scala | 56 ++++++++++++------- .../spark/sql/streaming/StreamTest.scala | 12 ++++ .../continuous/ContinuousSuite.scala | 40 ++++--------- 8 files changed, 84 insertions(+), 67 deletions(-) diff --git a/sql/core/src/main/java/org/apache/spark/sql/streaming/Trigger.java b/sql/core/src/main/java/org/apache/spark/sql/streaming/Trigger.java index 9e3ad5033054a..33ae9a9e87668 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/streaming/Trigger.java +++ b/sql/core/src/main/java/org/apache/spark/sql/streaming/Trigger.java @@ -19,10 +19,10 @@ import java.util.concurrent.TimeUnit; -import org.apache.spark.sql.execution.streaming.continuous.ContinuousTrigger; import scala.concurrent.duration.Duration; import org.apache.spark.annotation.InterfaceStability; +import org.apache.spark.sql.execution.streaming.continuous.ContinuousTrigger; import org.apache.spark.sql.execution.streaming.OneTimeTrigger$; /** diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2.scala index aedc09d121cac..1862da8892cb2 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2.scala @@ -131,7 +131,9 @@ object DataWritingSparkTask extends Logging { context: TaskContext, iter: Iterator[InternalRow]): WriterCommitMessage = { val dataWriter = writeTask.createDataWriter(context.partitionId(), context.attemptNumber()) - val runId = context.getLocalProperty(ContinuousExecution.RUN_ID_KEY) + val epochCoordinator = EpochCoordinatorRef.get( + context.getLocalProperty(ContinuousExecution.RUN_ID_KEY), + SparkEnv.get) val currentMsg: WriterCommitMessage = null var currentEpoch = context.getLocalProperty(ContinuousExecution.START_EPOCH_KEY).toLong @@ -143,7 +145,7 @@ object DataWritingSparkTask extends Logging { logInfo(s"Writer for partition ${context.partitionId()} is committing.") val msg = dataWriter.commit() logInfo(s"Writer for partition ${context.partitionId()} committed.") - EpochCoordinatorRef.get(runId, SparkEnv.get).send( + epochCoordinator.send( CommitPartitionEpoch(context.partitionId(), currentEpoch, msg) ) currentEpoch += 1 diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala index 19c8f29c23367..3e76bf7b7ca8f 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala @@ -148,7 +148,7 @@ abstract class StreamExecution( * Pretty identified string of printing in logs. Format is * If name is set "queryName [id = xyz, runId = abc]" else "[id = xyz, runId = abc]" */ - private val prettyIdString = + protected val prettyIdString = Option(name).map(_ + " ").getOrElse("") + s"[id = $id, runId = $runId]" /** diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala index 584d211a28dc4..1c35b06bd4b85 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala @@ -58,7 +58,6 @@ class ContinuousExecution( assert(queryExecutionThread eq Thread.currentThread, "logicalPlan must be initialized in StreamExecutionThread " + s"but the current thread was ${Thread.currentThread}") - var nextSourceId = 0L val toExecutionRelationMap = MutableMap[StreamingRelationV2, ContinuousExecutionRelation]() analyzedPlan.transform { case r @ StreamingRelationV2( @@ -218,8 +217,7 @@ class ContinuousExecution( // Use the parent Spark session for the endpoint since it's where this query ID is registered. val epochEndpoint = EpochCoordinatorRef.create( - writer.get(), reader, currentBatchId, - id.toString, runId.toString, sparkSession, SparkEnv.get) + writer.get(), reader, this, currentBatchId, sparkSession, SparkEnv.get) val epochUpdateThread = new Thread(new Runnable { override def run: Unit = { try { @@ -227,8 +225,8 @@ class ContinuousExecution( startTrigger() if (reader.needsReconfiguration()) { - stopSources() state.set(RECONFIGURING) + stopSources() if (queryExecutionThread.isAlive) { sparkSession.sparkContext.cancelJobGroup(runId.toString) queryExecutionThread.interrupt() @@ -249,7 +247,7 @@ class ContinuousExecution( return } } - }) + }, s"epoch update thread for $prettyIdString") try { epochUpdateThread.setDaemon(true) @@ -280,7 +278,11 @@ class ContinuousExecution( } val globalOffset = reader.mergeOffsets(partitionOffsets.toArray) synchronized { - offsetLog.add(epoch, OffsetSeq.fill(globalOffset)) + if (queryExecutionThread.isAlive) { + offsetLog.add(epoch, OffsetSeq.fill(globalOffset)) + } else { + return + } } } @@ -292,9 +294,13 @@ class ContinuousExecution( assert(continuousSources.length == 1, "only one continuous source supported currently") assert(offsetLog.get(epoch).isDefined, s"offset for epoch $epoch not reported before commit") synchronized { - commitLog.add(epoch) - val offset = offsetLog.get(epoch).get.offsets(0).get - committedOffsets ++= Seq(continuousSources(0) -> offset) + if (queryExecutionThread.isAlive) { + commitLog.add(epoch) + val offset = offsetLog.get(epoch).get.offsets(0).get + committedOffsets ++= Seq(continuousSources(0) -> offset) + } else { + return + } } if (minLogEntriesToMaintain < currentBatchId) { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/EpochCoordinator.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/EpochCoordinator.scala index 80db3692a09b0..fed2d8879fc48 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/EpochCoordinator.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/EpochCoordinator.scala @@ -77,13 +77,13 @@ private[sql] object EpochCoordinatorRef extends Logging { def create( writer: ContinuousWriter, reader: ContinuousReader, + query: ContinuousExecution, startEpoch: Long, - queryId: String, - runId: String, session: SparkSession, env: SparkEnv): RpcEndpointRef = synchronized { - val coordinator = new EpochCoordinator(writer, reader, startEpoch, queryId, session, env.rpcEnv) - val ref = env.rpcEnv.setupEndpoint(endpointName(runId), coordinator) + val coordinator = new EpochCoordinator( + writer, reader, query, startEpoch, query.id.toString(), session, env.rpcEnv) + val ref = env.rpcEnv.setupEndpoint(endpointName(query.runId.toString()), coordinator) logInfo("Registered EpochCoordinator endpoint") ref } @@ -109,6 +109,7 @@ private[sql] object EpochCoordinatorRef extends Logging { private[continuous] class EpochCoordinator( writer: ContinuousWriter, reader: ContinuousReader, + query: ContinuousExecution, startEpoch: Long, queryId: String, session: SparkSession, diff --git a/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala index fcaca3d75b74f..9fb8be423614b 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala @@ -297,31 +297,47 @@ object QueryTest { }) } + private def genError( + expectedAnswer: Seq[Row], + sparkAnswer: Seq[Row], + isSorted: Boolean = false): String = { + val getRowType: Option[Row] => String = row => + row.map(row => + if (row.schema == null) { + "struct<>" + } else { + s"${row.schema.catalogString}" + }).getOrElse("struct<>") + + s""" + |== Results == + |${ + sideBySide( + s"== Correct Answer - ${expectedAnswer.size} ==" +: + getRowType(expectedAnswer.headOption) +: + prepareAnswer(expectedAnswer, isSorted).map(_.toString()), + s"== Spark Answer - ${sparkAnswer.size} ==" +: + getRowType(sparkAnswer.headOption) +: + prepareAnswer(sparkAnswer, isSorted).map(_.toString())).mkString("\n") + } + """.stripMargin + } + + def includesRows( + expectedRows: Seq[Row], + sparkAnswer: Seq[Row]): Option[String] = { + if (!prepareAnswer(expectedRows, true).toSet.subsetOf(prepareAnswer(sparkAnswer, true).toSet)) { + return Some(genError(expectedRows, sparkAnswer, true)) + } + None + } + def sameRows( expectedAnswer: Seq[Row], sparkAnswer: Seq[Row], isSorted: Boolean = false): Option[String] = { if (prepareAnswer(expectedAnswer, isSorted) != prepareAnswer(sparkAnswer, isSorted)) { - val getRowType: Option[Row] => String = row => - row.map(row => - if (row.schema == null) { - "struct<>" - } else { - s"${row.schema.catalogString}" - }).getOrElse("struct<>") - - val errorMessage = - s""" - |== Results == - |${sideBySide( - s"== Correct Answer - ${expectedAnswer.size} ==" +: - getRowType(expectedAnswer.headOption) +: - prepareAnswer(expectedAnswer, isSorted).map(_.toString()), - s"== Spark Answer - ${sparkAnswer.size} ==" +: - getRowType(sparkAnswer.headOption) +: - prepareAnswer(sparkAnswer, isSorted).map(_.toString())).mkString("\n")} - """.stripMargin - return Some(errorMessage) + return Some(genError(expectedAnswer, sparkAnswer, isSorted)) } None } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala index 08d7fde2ac673..fb9ebc81dd750 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala @@ -171,6 +171,12 @@ trait StreamTest extends QueryTest with SharedSQLContext with TimeLimits with Be private def operatorName = if (lastOnly) "CheckLastBatch" else "CheckAnswer" } + case class CheckAnswerRowsContains(expectedAnswer: Seq[Row], lastOnly: Boolean = false) + extends StreamAction with StreamMustBeRunning { + override def toString: String = s"$operatorName: ${expectedAnswer.mkString(",")}" + private def operatorName = if (lastOnly) "CheckLastBatch" else "CheckAnswer" + } + case class CheckAnswerRowsByFunc(checkFunction: Row => Unit, lastOnly: Boolean) extends StreamAction with StreamMustBeRunning { override def toString: String = s"$operatorName: ${checkFunction.toString()}" @@ -627,6 +633,12 @@ trait StreamTest extends QueryTest with SharedSQLContext with TimeLimits with Be error => failTest(error) } + case CheckAnswerRowsContains(expectedAnswer, lastOnly) => + val sparkAnswer = fetchStreamAnswer(currentStream, lastOnly) + QueryTest.includesRows(expectedAnswer, sparkAnswer).foreach { + error => failTest(error) + } + case CheckAnswerRowsByFunc(checkFunction, lastOnly) => val sparkAnswer = fetchStreamAnswer(currentStream, lastOnly) sparkAnswer.foreach { row => diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala index 46003f7cd1bba..eda0d8ad48313 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala @@ -92,13 +92,13 @@ class ContinuousSuite extends ContinuousSuiteBase { AwaitEpoch(0), Execute(waitForRateSourceTriggers(_, 2)), IncrementEpoch(), - CheckAnswer(scala.Range(0, 10): _*), + CheckAnswerRowsContains(scala.Range(0, 10).map(Row(_))), StopStream, StartStream(longContinuousTrigger), AwaitEpoch(2), Execute(waitForRateSourceTriggers(_, 2)), IncrementEpoch(), - CheckAnswer(scala.Range(0, 20): _*), + CheckAnswerRowsContains(scala.Range(0, 20).map(Row(_))), StopStream) } @@ -118,7 +118,7 @@ class ContinuousSuite extends ContinuousSuiteBase { IncrementEpoch(), Execute(waitForRateSourceTriggers(_, 4)), IncrementEpoch(), - CheckAnswer(scala.Range(0, 40, 2): _*)) + CheckAnswerRowsContains(scala.Range(0, 40, 2).map(Row(_)))) } test("flatMap") { @@ -137,7 +137,7 @@ class ContinuousSuite extends ContinuousSuiteBase { IncrementEpoch(), Execute(waitForRateSourceTriggers(_, 4)), IncrementEpoch(), - CheckAnswer(scala.Range(0, 20).flatMap(n => Seq(0, n, n * 2)): _*)) + CheckAnswerRowsContains(scala.Range(0, 20).flatMap(n => Seq(0, n, n * 2)).map(Row(_)))) } test("filter") { @@ -156,7 +156,7 @@ class ContinuousSuite extends ContinuousSuiteBase { IncrementEpoch(), Execute(waitForRateSourceTriggers(_, 4)), IncrementEpoch(), - CheckAnswer(scala.Range(6, 20): _*)) + CheckAnswerRowsContains(scala.Range(6, 20).map(Row(_)))) } test("deduplicate") { @@ -205,7 +205,7 @@ class ContinuousSuite extends ContinuousSuiteBase { AwaitEpoch(0), Execute(waitForRateSourceTriggers(_, 2)), IncrementEpoch(), - CheckAnswer(scala.Range(0, 10): _*), + CheckAnswerRowsContains(scala.Range(0, 10).map(Row(_))), StopStream, StartStream(longContinuousTrigger), StopStream, @@ -215,7 +215,7 @@ class ContinuousSuite extends ContinuousSuiteBase { AwaitEpoch(2), Execute(waitForRateSourceTriggers(_, 2)), IncrementEpoch(), - CheckAnswer(scala.Range(0, 20): _*), + CheckAnswerRowsContains(scala.Range(0, 20).map(Row(_))), StopStream) } @@ -238,7 +238,7 @@ class ContinuousSuite extends ContinuousSuiteBase { query.stop() val results = spark.read.table("noharness").collect() - assert(results.toSet == Set(0, 1, 2, 3).map(Row(_))) + assert(Set(0, 1, 2, 3).map(Row(_)).subsetOf(results.toSet)) } } @@ -280,17 +280,7 @@ class ContinuousStressSuite extends ContinuousSuiteBase { AwaitEpoch(0), Execute(waitForRateSourceTriggers(_, 201)), IncrementEpoch(), - Execute { query => - // Because we have automatic advancement, we can't reliably guarantee another trigger won't - // commit more than the 100K rows we expect before we can check. So we simply ensure that: - // * the highest value committed was at least 100000 - 1 - // * all values below the highest are present - val data = query.sink.asInstanceOf[MemorySinkV2].allData - val vals = data.map(_.getLong(0)).toSet - assert(scala.Range(0, 25000).forall { i => - vals.contains(i) - }) - }) + CheckAnswerRowsContains(scala.Range(0, 25000).map(Row(_)))) } test("restarts") { @@ -321,16 +311,6 @@ class ContinuousStressSuite extends ContinuousSuiteBase { StopStream, StartStream(Trigger.Continuous(2012)), AwaitEpoch(50), - Execute { query => - // Because we have automatic advancement, we can't reliably check where precisely the last - // commit happened. And we don't have exactly once prøocessing, meaning values may be - // duplicated. So we just check all values below the highest are present, and as a - // sanity check that we got at least up to the 50th trigger. - val data = query.sink.asInstanceOf[MemorySinkV2].allData - val vals = data.map(_.getLong(0)).sorted - scala.Range(0, 25000).foreach { i => - assert(vals.contains(i), s"$i was missing from result data") - } - }) + CheckAnswerRowsContains(scala.Range(0, 25000).map(Row(_)))) } } From b4f79762c083735011bf98250c39c263876c8cc8 Mon Sep 17 00:00:00 2001 From: Jose Torres Date: Fri, 22 Dec 2017 19:22:43 -0800 Subject: [PATCH 50/50] remove unnecessary queryId --- .../execution/streaming/continuous/EpochCoordinator.scala | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/EpochCoordinator.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/EpochCoordinator.scala index fed2d8879fc48..7f1e8abd79b99 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/EpochCoordinator.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/EpochCoordinator.scala @@ -82,7 +82,7 @@ private[sql] object EpochCoordinatorRef extends Logging { session: SparkSession, env: SparkEnv): RpcEndpointRef = synchronized { val coordinator = new EpochCoordinator( - writer, reader, query, startEpoch, query.id.toString(), session, env.rpcEnv) + writer, reader, query, startEpoch, session, env.rpcEnv) val ref = env.rpcEnv.setupEndpoint(endpointName(query.runId.toString()), coordinator) logInfo("Registered EpochCoordinator endpoint") ref @@ -111,7 +111,6 @@ private[continuous] class EpochCoordinator( reader: ContinuousReader, query: ContinuousExecution, startEpoch: Long, - queryId: String, session: SparkSession, override val rpcEnv: RpcEnv) extends ThreadSafeRpcEndpoint with Logging { @@ -137,8 +136,6 @@ private[continuous] class EpochCoordinator( if (thisEpochCommits.size == numWriterPartitions && nextEpochOffsets.size == numReaderPartitions) { logDebug(s"Epoch $epoch has received commits from all partitions. Committing globally.") - val query = session.streams.get(queryId).asInstanceOf[StreamingQueryWrapper] - .streamingQuery.asInstanceOf[ContinuousExecution] // Sequencing is important here. We must commit to the writer before recording the commit // in the query, or we will end up dropping the commit if we restart in the middle. writer.commit(epoch, thisEpochCommits.toArray) @@ -163,8 +160,6 @@ private[continuous] class EpochCoordinator( } case ReportPartitionOffset(partitionId, epoch, offset) => - val query = session.streams.get(queryId).asInstanceOf[StreamingQueryWrapper] - .streamingQuery.asInstanceOf[ContinuousExecution] partitionOffsets.put((epoch, partitionId), offset) val thisEpochOffsets = partitionOffsets.collect { case ((e, _), o) if e == epoch => o }