From c17a8ff52377871ab4ff96b648ebaf4112f0b5be Mon Sep 17 00:00:00 2001 From: Shixiong Zhu Date: Sat, 25 Aug 2018 09:17:40 -0700 Subject: [PATCH] [SPARK-25214][SS][FOLLOWUP] Fix the issue that Kafka v2 source may return duplicated records when `failOnDataLoss=false` ## What changes were proposed in this pull request? This is a follow up PR for #22207 to fix a potential flaky test. `processAllAvailable` doesn't work for continuous processing so we should not use it for a continuous query. ## How was this patch tested? Jenkins. Closes #22230 from zsxwing/SPARK-25214-2. Authored-by: Shixiong Zhu Signed-off-by: Shixiong Zhu --- .../spark/sql/kafka010/KafkaDontFailOnDataLossSuite.scala | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaDontFailOnDataLossSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaDontFailOnDataLossSuite.scala index 0ff341c1a3db7..39c4e3fda1a4b 100644 --- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaDontFailOnDataLossSuite.scala +++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaDontFailOnDataLossSuite.scala @@ -80,7 +80,7 @@ trait KafkaMissingOffsetsTest extends SharedSQLContext { } } -class KafkaDontFailOnDataLossSuite extends KafkaMissingOffsetsTest { +class KafkaDontFailOnDataLossSuite extends StreamTest with KafkaMissingOffsetsTest { import testImplicits._ @@ -165,7 +165,11 @@ class KafkaDontFailOnDataLossSuite extends KafkaMissingOffsetsTest { .trigger(Trigger.Continuous(100)) .start() try { - query.processAllAvailable() + // `processAllAvailable` doesn't work for continuous processing, so just wait until the last + // record appears in the table. + eventually(timeout(streamingTimeout)) { + assert(spark.table(table).as[String].collect().contains("49")) + } } finally { query.stop() }