From efde0e9b778d5dff6602a2e18d47c06108136157 Mon Sep 17 00:00:00 2001
From: Prashant Sharma <prashsh1@in.ibm.com>
Date: Fri, 27 Apr 2018 12:39:35 +0530
Subject: [PATCH 01/10] Porting some changes from spark build, to fix my local
 bahir build

---
 pom.xml | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/pom.xml b/pom.xml
index dc54de19..41ae9085 100644
--- a/pom.xml
+++ b/pom.xml
@@ -407,7 +407,7 @@
         <plugin>
           <groupId>org.apache.maven.plugins</groupId>
           <artifactId>maven-enforcer-plugin</artifactId>
-          <version>1.4.1</version>
+          <version>3.0.0-M1</version>
           <executions>
             <execution>
               <id>enforce-versions</id>
@@ -433,6 +433,7 @@
                       -->
                       <exclude>org.jboss.netty</exclude>
                       <exclude>org.codehaus.groovy</exclude>
+                      <exclude>*:*_2.10</exclude>
                     </excludes>
                     <searchTransitive>true</searchTransitive>
                   </bannedDependencies>
@@ -482,7 +483,8 @@
         <plugin>
           <groupId>net.alchim31.maven</groupId>
           <artifactId>scala-maven-plugin</artifactId>
-          <version>3.3.1</version>
+          <!-- 3.3.1 won't work with zinc; fails to find javac from java.home -->
+          <version>3.2.2</version>
           <executions>
             <execution>
               <id>eclipse-add-source</id>

From 37487c131f602c2879daf5760396f40cab306f11 Mon Sep 17 00:00:00 2001
From: Prashant Sharma <prashsh1@in.ibm.com>
Date: Wed, 9 May 2018 12:54:17 +0530
Subject: [PATCH 02/10] Migrating Mqtt spark structured streaming connector to
 DatasourceV2 API.

---
 pom.xml                                       |  10 +-
 .../akka/AkkaStreamSourceSuite.scala          |   2 +-
 .../streaming/mqtt/MQTTStreamWordCount.scala  |   2 +-
 .../sql/streaming/mqtt/MQTTStreamSource.scala | 236 +++++++++++-------
 .../sql/streaming/mqtt/MessageStore.scala     |  90 ++++---
 .../mqtt/LocalMessageStoreSuite.scala         |   6 +-
 .../mqtt/MQTTStreamSourceSuite.scala          |  52 ++--
 .../sql/streaming/mqtt/MQTTTestUtils.scala    |  11 +-
 8 files changed, 253 insertions(+), 156 deletions(-)

diff --git a/pom.xml b/pom.xml
index 41ae9085..e00d9be8 100644
--- a/pom.xml
+++ b/pom.xml
@@ -99,7 +99,7 @@
     <log4j.version>1.2.17</log4j.version>
 
     <!-- Spark version -->
-    <spark.version>2.2.0</spark.version>
+    <spark.version>2.3.0</spark.version>
 
     <!-- MQTT Client -->
     <mqtt.paho.client>1.1.0</mqtt.paho.client>
@@ -348,13 +348,13 @@
       <dependency>
         <groupId>org.scalatest</groupId>
         <artifactId>scalatest_${scala.binary.version}</artifactId>
-        <version>2.2.6</version>
+        <version>3.0.3</version>
         <scope>test</scope>
       </dependency>
       <dependency>
         <groupId>org.scalacheck</groupId>
         <artifactId>scalacheck_${scala.binary.version}</artifactId>
-        <version>1.12.5</version> <!-- 1.13.0 appears incompatible with scalatest 2.2.6 -->
+        <version>1.13.5</version>
         <scope>test</scope>
       </dependency>
 
@@ -559,7 +559,7 @@
         <plugin>
           <groupId>org.apache.maven.plugins</groupId>
           <artifactId>maven-surefire-plugin</artifactId>
-          <version>2.19.1</version>
+          <version>2.20.1</version>
           <!-- Note config is repeated in scalatest config -->
           <configuration>
             <includes>
@@ -569,7 +569,7 @@
               <include>**/*Suite.java</include>
             </includes>
             <reportsDirectory>${project.build.directory}/surefire-reports</reportsDirectory>
-            <argLine>-Xmx3g -Xss4096k -XX:ReservedCodeCacheSize=${CodeCacheSize}</argLine>
+            <argLine>-ea -Xmx3g -Xss4m -XX:ReservedCodeCacheSize=${CodeCacheSize}</argLine>
             <environmentVariables>
               <!--
                 Setting SPARK_DIST_CLASSPATH is a simple way to make sure any child processes
diff --git a/sql-streaming-akka/src/test/scala/org/apache/bahir/sql/streaming/akka/AkkaStreamSourceSuite.scala b/sql-streaming-akka/src/test/scala/org/apache/bahir/sql/streaming/akka/AkkaStreamSourceSuite.scala
index 5e9b86e9..cdf629b6 100644
--- a/sql-streaming-akka/src/test/scala/org/apache/bahir/sql/streaming/akka/AkkaStreamSourceSuite.scala
+++ b/sql-streaming-akka/src/test/scala/org/apache/bahir/sql/streaming/akka/AkkaStreamSourceSuite.scala
@@ -155,7 +155,7 @@ class StressTestAkkaSource extends AkkaStreamSourceSuite {
 
   // Run with -Xmx1024m
   // Default allowed payload size sent to an akka actor is 128000 bytes.
-  test("Send & Receive messages of size 128000 bytes.") {
+  ignore("Send & Receive messages of size 128000 bytes.") {
 
     val freeMemory: Long = Runtime.getRuntime.freeMemory()
 
diff --git a/sql-streaming-mqtt/examples/src/main/scala/org/apache/bahir/examples/sql/streaming/mqtt/MQTTStreamWordCount.scala b/sql-streaming-mqtt/examples/src/main/scala/org/apache/bahir/examples/sql/streaming/mqtt/MQTTStreamWordCount.scala
index 237a8fa2..a573744a 100644
--- a/sql-streaming-mqtt/examples/src/main/scala/org/apache/bahir/examples/sql/streaming/mqtt/MQTTStreamWordCount.scala
+++ b/sql-streaming-mqtt/examples/src/main/scala/org/apache/bahir/examples/sql/streaming/mqtt/MQTTStreamWordCount.scala
@@ -52,7 +52,7 @@ object MQTTStreamWordCount  {
     // Create DataFrame representing the stream of input lines from connection to mqtt server
     val lines = spark.readStream
       .format("org.apache.bahir.sql.streaming.mqtt.MQTTStreamSourceProvider")
-      .option("topic", topic)
+      .option("topic", topic).option("persistence", "memory")
       .load(brokerUrl).as[(String, Timestamp)]
 
     // Split the lines into words
diff --git a/sql-streaming-mqtt/src/main/scala/org/apache/bahir/sql/streaming/mqtt/MQTTStreamSource.scala b/sql-streaming-mqtt/src/main/scala/org/apache/bahir/sql/streaming/mqtt/MQTTStreamSource.scala
index 1739ff33..b198ca95 100644
--- a/sql-streaming-mqtt/src/main/scala/org/apache/bahir/sql/streaming/mqtt/MQTTStreamSource.scala
+++ b/sql-streaming-mqtt/src/main/scala/org/apache/bahir/sql/streaming/mqtt/MQTTStreamSource.scala
@@ -20,19 +20,23 @@ package org.apache.bahir.sql.streaming.mqtt
 import java.nio.charset.Charset
 import java.sql.Timestamp
 import java.text.SimpleDateFormat
-import java.util.Calendar
-import java.util.concurrent.CountDownLatch
+import java.util.{Calendar, Optional}
+import javax.annotation.concurrent.GuardedBy
 
+import scala.collection.JavaConverters._
 import scala.collection.concurrent.TrieMap
-import scala.collection.mutable.ArrayBuffer
+import scala.collection.immutable.IndexedSeq
+import scala.collection.mutable.ListBuffer
 import scala.util.{Failure, Success, Try}
 
 import org.eclipse.paho.client.mqttv3._
 import org.eclipse.paho.client.mqttv3.persist.{MemoryPersistence, MqttDefaultFilePersistence}
 
-import org.apache.spark.sql.{DataFrame, SQLContext}
-import org.apache.spark.sql.execution.streaming.{LongOffset, Offset, Source}
-import org.apache.spark.sql.sources.{DataSourceRegister, StreamSourceProvider}
+import org.apache.spark.sql._
+import org.apache.spark.sql.sources.DataSourceRegister
+import org.apache.spark.sql.sources.v2.{DataSourceOptions, DataSourceV2, MicroBatchReadSupport}
+import org.apache.spark.sql.sources.v2.reader.{DataReader, DataReaderFactory}
+import org.apache.spark.sql.sources.v2.reader.streaming.{MicroBatchReader, Offset => OffsetV2}
 import org.apache.spark.sql.types.{StringType, StructField, StructType, TimestampType}
 
 import org.apache.bahir.utils.Logging
@@ -60,36 +64,41 @@ object MQTTStreamConstants {
  * @param clientId clientId, this client is assoicated with. Provide the same value to recover
  *                 a stopped client.
  * @param messageParser parsing logic for processing incoming messages from Mqtt Server.
- * @param sqlContext Spark provided, SqlContext.
  * @param mqttConnectOptions an instance of MqttConnectOptions for this Source.
  * @param qos the maximum quality of service to subscribe each topic at.Messages published at
  *            a lower quality of service will be received at the published QoS. Messages
  *            published at a higher quality of service will be received using the QoS specified
  *            on the subscribe.
  */
-class MQTTTextStreamSource(brokerUrl: String, persistence: MqttClientPersistence,
-    topic: String, clientId: String, messageParser: Array[Byte] => (String, Timestamp),
-    sqlContext: SQLContext, mqttConnectOptions: MqttConnectOptions, qos: Int)
-  extends Source with Logging {
+class MQTTTextStreamSource(options: DataSourceOptions, brokerUrl: String, persistence:
+    MqttClientPersistence, topic: String, clientId: String,
+    messageParser: Array[Byte] => (String, Timestamp),
+    mqttConnectOptions: MqttConnectOptions, qos: Int)
+  extends MicroBatchReader with Logging {
 
-  override def schema: StructType = MQTTStreamConstants.SCHEMA_DEFAULT
+  private var startOffset: OffsetV2 = _
+  private var endOffset: OffsetV2 = _
 
-  private val store = new LocalMessageStore(persistence, sqlContext.sparkContext.getConf)
 
-  private val messages = new TrieMap[Int, (String, Timestamp)]
+  private val store = new LocalMessageStore(persistence)
 
-  private val initLock = new CountDownLatch(1)
+  private val messages = new TrieMap[Long, (String, Timestamp)]
+
+  @GuardedBy("this")
+  private var currentOffset: LongOffset = LongOffset(-1L)
+
+  @GuardedBy("this")
+  private var lastOffsetCommitted: LongOffset = LongOffset(-1L)
 
-  private var offset = 0
 
   private var client: MqttClient = _
 
-  private def fetchLastProcessedOffset(): Int = {
+  private def fetchLastProcessedOffset(): LongOffset = {
     Try(store.maxProcessedOffset) match {
       case Success(x) =>
         log.info(s"Recovering from last stored offset $x")
-        x
-      case Failure(e) => 0
+        LongOffset(x)
+      case Failure(e) => LongOffset(-1L)
     }
   }
 
@@ -97,15 +106,13 @@ class MQTTTextStreamSource(brokerUrl: String, persistence: MqttClientPersistence
   private def initialize(): Unit = {
 
     client = new MqttClient(brokerUrl, clientId, persistence)
-
     val callback = new MqttCallbackExtended() {
 
       override def messageArrived(topic_ : String, message: MqttMessage): Unit = synchronized {
-        initLock.await() // Wait for initialization to complete.
-        val temp = offset + 1
-        messages.put(temp, messageParser(message.getPayload))
-        offset = temp
-        log.trace(s"Message arrived, $topic_ $message")
+        val offset = currentOffset.offset + 1L
+        messages.put(offset, messageParser(message.getPayload))
+        currentOffset = LongOffset(offset)
+        log.info(s"Message arrived, $topic_ $message")
       }
 
       override def deliveryComplete(token: IMqttDeliveryToken): Unit = {
@@ -121,99 +128,151 @@ class MQTTTextStreamSource(brokerUrl: String, persistence: MqttClientPersistence
     }
     client.setCallback(callback)
     client.connect(mqttConnectOptions)
-    client.subscribe(topic, qos)
     // It is not possible to initialize offset without `client.connect`
-    offset = fetchLastProcessedOffset()
-    initLock.countDown() // Release.
+    lastOffsetCommitted = fetchLastProcessedOffset()
+    client.subscribe(topic, qos)
   }
 
-  /** Stop this source and free any resources it has allocated. */
-  override def stop(): Unit = {
-    client.disconnect()
-    persistence.close()
-    client.close()
+  override def setOffsetRange(
+      start: Optional[OffsetV2], end: Optional[OffsetV2]): Unit = synchronized {
+    startOffset = start.orElse(LongOffset(-1L))
+    endOffset = end.orElse(currentOffset)
   }
 
-  /** Returns the maximum available offset for this source. */
-  override def getOffset: Option[Offset] = {
-    if (offset == 0) {
-      None
-    } else {
-      Some(LongOffset(offset))
-    }
+  override def getStartOffset(): OffsetV2 = {
+    Option(startOffset).getOrElse(throw new IllegalStateException("start offset not set"))
+  }
+
+  override def getEndOffset(): OffsetV2 = {
+    Option(endOffset).getOrElse(throw new IllegalStateException("end offset not set"))
+  }
+
+  override def deserializeOffset(json: String): OffsetV2 = {
+    LongOffset(json.toLong)
+  }
+
+  override def readSchema(): StructType = {
+    MQTTStreamConstants.SCHEMA_DEFAULT
   }
 
-  /**
-   * Returns the data that is between the offsets (`start`, `end`]. When `start` is `None` then
-   * the batch should begin with the first available record. This method must always return the
-   * same data for a particular `start` and `end` pair.
-   */
-  override def getBatch(start: Option[Offset], end: Offset): DataFrame = synchronized {
-    val startIndex = start.getOrElse(LongOffset(0L)).asInstanceOf[LongOffset].offset.toInt
-    val endIndex = end.asInstanceOf[LongOffset].offset.toInt
-    val data: ArrayBuffer[(String, Timestamp)] = ArrayBuffer.empty
-    // Move consumed messages to persistent store.
-    (startIndex + 1 to endIndex).foreach { id =>
-      val element: (String, Timestamp) = messages.getOrElse(id, store.retrieve(id))
-      data += element
-      store.store(id, element)
-      messages.remove(id, element)
+  override def createDataReaderFactories(): java.util.List[DataReaderFactory[Row]] = {
+    // Internal buffer only holds the batches after lastOffsetCommitted
+    val rawList: IndexedSeq[(String, Timestamp)] = synchronized {
+      val sliceStart = LongOffset.convert(startOffset).get.offset + 1
+      val sliceEnd = LongOffset.convert(endOffset).get.offset + 1
+      for ( i <- sliceStart until sliceEnd) yield messages(i)
     }
-    log.trace(s"Get Batch invoked, ${data.mkString}")
-    import sqlContext.implicits._
-    data.toDF("value", "timestamp")
+
+    val spark = SparkSession.getActiveSession.get
+    val numPartitions = spark.sparkContext.defaultParallelism
+
+    val slices = Array.fill(numPartitions)(new ListBuffer[(String, Timestamp)])
+    rawList.zipWithIndex.foreach { case (r, idx) =>
+      slices(idx % numPartitions).append(r)
+    }
+
+    (0 until numPartitions).map { i =>
+      val slice = slices(i)
+      new DataReaderFactory[Row] {
+        override def createDataReader(): DataReader[Row] = new DataReader[Row] {
+          private var currentIdx = -1
+
+          override def next(): Boolean = {
+            currentIdx += 1
+            currentIdx < slice.size
+          }
+
+          override def get(): Row = {
+            Row(slice(currentIdx)._1, slice(currentIdx)._2)
+          }
+
+          override def close(): Unit = {}
+        }
+      }
+    }.toList.asJava
   }
 
-}
+  override def commit(end: OffsetV2): Unit = synchronized {
+    val newOffset = LongOffset.convert(end).getOrElse(
+      sys.error(s"MQTTStreamSource.commit() received an offset ($end) that did not " +
+        s"originate with an instance of this class")
+    )
 
-class MQTTStreamSourceProvider extends StreamSourceProvider with DataSourceRegister with Logging {
+    val offsetDiff = (newOffset.offset - lastOffsetCommitted.offset).toInt
+
+    if (offsetDiff < 0) {
+      sys.error(s"Offsets committed out of order: $lastOffsetCommitted followed by $end")
+    }
+
+    (lastOffsetCommitted.offset until newOffset.offset).foreach { x =>
+      messages.remove(x + 1)
+    }
+    lastOffsetCommitted = newOffset
+  }
 
-  override def sourceSchema(sqlContext: SQLContext, schema: Option[StructType],
-      providerName: String, parameters: Map[String, String]): (String, StructType) = {
-    ("mqtt", MQTTStreamConstants.SCHEMA_DEFAULT)
+  /** Stop this source. */
+  override def stop(): Unit = synchronized {
+    client.disconnect()
+    persistence.close()
+    client.close()
   }
 
-  override def createSource(sqlContext: SQLContext, metadataPath: String,
-      schema: Option[StructType], providerName: String, parameters: Map[String, String]): Source = {
+  override def toString: String = s"MQTTStreamSource[brokerUrl: $brokerUrl, topic: $topic" +
+    s" clientId: $clientId]"
+}
+
+class MQTTStreamSourceProvider extends DataSourceV2
+  with MicroBatchReadSupport with DataSourceRegister with Logging {
 
+  override def createMicroBatchReader(schema: Optional[StructType],
+      checkpointLocation: String, parameters: DataSourceOptions): MicroBatchReader = {
+    if (schema.isPresent) {
+      throw
+        new IllegalArgumentException("The mqtt source does not support a user-specified schema.")
+    }
     def e(s: String) = new IllegalArgumentException(s)
 
-    val brokerUrl: String = parameters.getOrElse("brokerUrl", parameters.getOrElse("path",
-      throw e("Please provide a `brokerUrl` by specifying path or .options(\"brokerUrl\",...)")))
+    val brokerUrl = parameters.get("brokerUrl").orElse(parameters.get("path").orElse(null))
 
+    if (brokerUrl == null) {
+      throw e("Please provide a broker url, with option(\"brokerUrl\", ...).")
+    }
 
-    val persistence: MqttClientPersistence = parameters.get("persistence") match {
-      case Some("memory") => new MemoryPersistence()
-      case _ => val localStorage: Option[String] = parameters.get("localStorage")
+    val persistence: MqttClientPersistence = parameters.get("persistence").orElse("") match {
+      case "memory" => new MemoryPersistence()
+      case _ => val localStorage: String = parameters.get("localStorage").orElse("")
         localStorage match {
-          case Some(x) => new MqttDefaultFilePersistence(x)
-          case None => new MqttDefaultFilePersistence()
+          case x => new MqttDefaultFilePersistence(x)
+          case _ => new MqttDefaultFilePersistence()
         }
     }
 
     val messageParserWithTimeStamp = (x: Array[Byte]) =>
       (new String(x, Charset.defaultCharset()), Timestamp.valueOf(
-      MQTTStreamConstants.DATE_FORMAT.format(Calendar.getInstance().getTime)))
+        MQTTStreamConstants.DATE_FORMAT.format(Calendar.getInstance().getTime)))
 
     // if default is subscribe everything, it leads to getting lot unwanted system messages.
-    val topic: String = parameters.getOrElse("topic",
-      throw e("Please specify a topic, by .options(\"topic\",...)"))
+    val topic: String = parameters.get("topic").orElse(null)
+    if (topic == null) {
+      throw e("Please specify a topic, by .options(\"topic\",...)")
+    }
 
-    val clientId: String = parameters.getOrElse("clientId", {
+    val clientId: String = parameters.get("clientId").orElse {
       log.warn("If `clientId` is not set, a random value is picked up." +
-        "\nRecovering from failure is not supported in such a case.")
-      MqttClient.generateClientId()})
+        " Recovering from failure is not supported in such a case.")
+      MqttClient.generateClientId()}
 
-    val username: Option[String] = parameters.get("username")
-    val password: Option[String] = parameters.get("password")
-    val connectionTimeout: Int = parameters.getOrElse("connectionTimeout",
+    val username: String = parameters.get("username").orElse(null)
+    val password: String = parameters.get("password").orElse(null)
+
+    val connectionTimeout: Int = parameters.get("connectionTimeout").orElse(
       MqttConnectOptions.CONNECTION_TIMEOUT_DEFAULT.toString).toInt
-    val keepAlive: Int = parameters.getOrElse("keepAlive", MqttConnectOptions
+    val keepAlive: Int = parameters.get("keepAlive").orElse(MqttConnectOptions
       .KEEP_ALIVE_INTERVAL_DEFAULT.toString).toInt
-    val mqttVersion: Int = parameters.getOrElse("mqttVersion", MqttConnectOptions
+    val mqttVersion: Int = parameters.get("mqttVersion").orElse(MqttConnectOptions
       .MQTT_VERSION_DEFAULT.toString).toInt
-    val cleanSession: Boolean = parameters.getOrElse("cleanSession", "false").toBoolean
-    val qos: Int = parameters.getOrElse("QoS", "1").toInt
+    val cleanSession: Boolean = parameters.get("cleanSession").orElse("false").toBoolean
+    val qos: Int = parameters.get("QoS").orElse("1").toInt
 
     val mqttConnectOptions: MqttConnectOptions = new MqttConnectOptions()
     mqttConnectOptions.setAutomaticReconnect(true)
@@ -222,15 +281,14 @@ class MQTTStreamSourceProvider extends StreamSourceProvider with DataSourceRegis
     mqttConnectOptions.setKeepAliveInterval(keepAlive)
     mqttConnectOptions.setMqttVersion(mqttVersion)
     (username, password) match {
-      case (Some(u: String), Some(p: String)) =>
+      case (u: String, p: String) if u != null && p != null =>
         mqttConnectOptions.setUserName(u)
         mqttConnectOptions.setPassword(p.toCharArray)
       case _ =>
     }
 
-    new MQTTTextStreamSource(brokerUrl, persistence, topic, clientId,
-      messageParserWithTimeStamp, sqlContext, mqttConnectOptions, qos)
+    new  MQTTTextStreamSource(parameters, brokerUrl, persistence, topic, clientId,
+      messageParserWithTimeStamp, mqttConnectOptions, qos)
   }
-
   override def shortName(): String = "mqtt"
 }
diff --git a/sql-streaming-mqtt/src/main/scala/org/apache/bahir/sql/streaming/mqtt/MessageStore.scala b/sql-streaming-mqtt/src/main/scala/org/apache/bahir/sql/streaming/mqtt/MessageStore.scala
index 84fd8c41..54b09cd7 100644
--- a/sql-streaming-mqtt/src/main/scala/org/apache/bahir/sql/streaming/mqtt/MessageStore.scala
+++ b/sql-streaming-mqtt/src/main/scala/org/apache/bahir/sql/streaming/mqtt/MessageStore.scala
@@ -18,15 +18,11 @@
 
 package org.apache.bahir.sql.streaming.mqtt
 
-import java.nio.ByteBuffer
+import java.io._
 import java.util
 
-import scala.reflect.ClassTag
-
 import org.eclipse.paho.client.mqttv3.{MqttClientPersistence, MqttPersistable, MqttPersistenceException}
-
-import org.apache.spark.SparkConf
-import org.apache.spark.serializer.{JavaSerializer, Serializer, SerializerInstance}
+import scala.util.Try
 
 import org.apache.bahir.utils.Logging
 
@@ -35,16 +31,13 @@ import org.apache.bahir.utils.Logging
 trait MessageStore {
 
   /** Store a single id and corresponding serialized message */
-  def store[T: ClassTag](id: Int, message: T): Boolean
-
-  /** Retrieve messages corresponding to certain offset range */
-  def retrieve[T: ClassTag](start: Int, end: Int): Seq[T]
+  def store[T](id: Long, message: T): Boolean
 
   /** Retrieve message corresponding to a given id. */
-  def retrieve[T: ClassTag](id: Int): T
+  def retrieve[T](id: Long): T
 
   /** Highest offset we have stored */
-  def maxProcessedOffset: Int
+  def maxProcessedOffset: Long
 
 }
 
@@ -63,6 +56,52 @@ private[mqtt] class MqttPersistableData(bytes: Array[Byte]) extends MqttPersista
   override def getPayloadLength: Int = 0
 }
 
+trait Serializer {
+
+  def deserialize[T](x: Array[Byte]): T
+
+  def serialize[T](x: T): Array[Byte]
+}
+
+class JavaSerializer extends Serializer {
+
+  override def deserialize[T](x: Array[Byte]): T = {
+    val bis = new ByteArrayInputStream(x)
+    val in = new ObjectInputStream(bis)
+    val obj = if (in != null) {
+      val o = in.readObject()
+      Try(in.close())
+      o
+    } else {
+      null
+    }
+    obj.asInstanceOf[T]
+  }
+
+  override def serialize[T](x: T): Array[Byte] = {
+    val bos = new ByteArrayOutputStream()
+    val out = new ObjectOutputStream(bos)
+    out.writeObject(x)
+    out.flush()
+    if (bos != null) {
+      val bytes: Array[Byte] = bos.toByteArray
+      Try(bos.close())
+      bytes
+    } else {
+      null
+    }
+  }
+}
+
+object JavaSerializer {
+
+  private lazy val instance = new JavaSerializer()
+
+  def getInstance(): JavaSerializer = instance
+
+}
+
+
 /**
  * A message store to persist messages received. This is not intended to be thread safe.
  * It uses `MqttDefaultFilePersistence` for storing messages on disk locally on the client.
@@ -70,44 +109,35 @@ private[mqtt] class MqttPersistableData(bytes: Array[Byte]) extends MqttPersista
 private[mqtt] class LocalMessageStore(val persistentStore: MqttClientPersistence,
     val serializer: Serializer) extends MessageStore with Logging {
 
-  val classLoader = Thread.currentThread.getContextClassLoader
-
-  def this(persistentStore: MqttClientPersistence, conf: SparkConf) =
-    this(persistentStore, new JavaSerializer(conf))
+  def this(persistentStore: MqttClientPersistence) =
+    this(persistentStore, JavaSerializer.getInstance())
 
-  val serializerInstance: SerializerInstance = serializer.newInstance()
-
-  private def get(id: Int) = {
+  private def get(id: Long) = {
     persistentStore.get(id.toString).getHeaderBytes
   }
 
   import scala.collection.JavaConverters._
 
-  def maxProcessedOffset: Int = {
+  def maxProcessedOffset: Long = {
     val keys: util.Enumeration[_] = persistentStore.keys()
     keys.asScala.map(x => x.toString.toInt).max
   }
 
   /** Store a single id and corresponding serialized message */
-  override def store[T: ClassTag](id: Int, message: T): Boolean = {
-    val bytes: Array[Byte] = serializerInstance.serialize(message).array()
+  override def store[T](id: Long, message: T): Boolean = {
+    val bytes: Array[Byte] = serializer.serialize(message)
     try {
       persistentStore.put(id.toString, new MqttPersistableData(bytes))
       true
     } catch {
       case e: MqttPersistenceException => log.warn(s"Failed to store message Id: $id", e)
-      false
+        false
     }
   }
 
-  /** Retrieve messages corresponding to certain offset range */
-  override def retrieve[T: ClassTag](start: Int, end: Int): Seq[T] = {
-    (start until end).map(x => retrieve(x))
-  }
-
   /** Retrieve message corresponding to a given id. */
-  override def retrieve[T: ClassTag](id: Int): T = {
-    serializerInstance.deserialize(ByteBuffer.wrap(get(id)), classLoader)
+  override def retrieve[T](id: Long): T = {
+    serializer.deserialize(get(id))
   }
 
 }
diff --git a/sql-streaming-mqtt/src/test/scala/org/apache/bahir/sql/streaming/mqtt/LocalMessageStoreSuite.scala b/sql-streaming-mqtt/src/test/scala/org/apache/bahir/sql/streaming/mqtt/LocalMessageStoreSuite.scala
index 9c678cb6..21e857da 100644
--- a/sql-streaming-mqtt/src/test/scala/org/apache/bahir/sql/streaming/mqtt/LocalMessageStoreSuite.scala
+++ b/sql-streaming-mqtt/src/test/scala/org/apache/bahir/sql/streaming/mqtt/LocalMessageStoreSuite.scala
@@ -31,9 +31,9 @@ import org.apache.bahir.utils.BahirUtils
 class LocalMessageStoreSuite extends SparkFunSuite with BeforeAndAfter {
 
   private val testData = Seq(1, 2, 3, 4, 5, 6)
-  private val javaSerializer: JavaSerializer = new JavaSerializer(new SparkConf())
+  private val javaSerializer: JavaSerializer = new JavaSerializer()
 
-  private val serializerInstance = javaSerializer.newInstance()
+  private val serializerInstance = javaSerializer
   private val tempDir: File = new File(System.getProperty("java.io.tmpdir") + "/mqtt-test2/")
   private val persistence: MqttDefaultFilePersistence =
     new MqttDefaultFilePersistence(tempDir.getAbsolutePath)
@@ -68,7 +68,7 @@ class LocalMessageStoreSuite extends SparkFunSuite with BeforeAndAfter {
   test("Max offset stored") {
     store.store(1, testData)
     store.store(10, testData)
-    val offset: Int = store.maxProcessedOffset
+    val offset = store.maxProcessedOffset
     assert(offset == 10)
   }
 
diff --git a/sql-streaming-mqtt/src/test/scala/org/apache/bahir/sql/streaming/mqtt/MQTTStreamSourceSuite.scala b/sql-streaming-mqtt/src/test/scala/org/apache/bahir/sql/streaming/mqtt/MQTTStreamSourceSuite.scala
index 38971a00..03ea213c 100644
--- a/sql-streaming-mqtt/src/test/scala/org/apache/bahir/sql/streaming/mqtt/MQTTStreamSourceSuite.scala
+++ b/sql-streaming-mqtt/src/test/scala/org/apache/bahir/sql/streaming/mqtt/MQTTStreamSourceSuite.scala
@@ -19,6 +19,7 @@ package org.apache.bahir.sql.streaming.mqtt
 
 import java.io.File
 import java.sql.Timestamp
+import java.util.Optional
 
 import scala.collection.JavaConverters._
 import scala.collection.mutable
@@ -28,8 +29,8 @@ import org.eclipse.paho.client.mqttv3.MqttException
 import org.scalatest.BeforeAndAfter
 
 import org.apache.spark.{SharedSparkContext, SparkFunSuite}
-import org.apache.spark.sql.{DataFrame, SQLContext}
-import org.apache.spark.sql.execution.streaming.LongOffset
+import org.apache.spark.sql._
+import org.apache.spark.sql.sources.v2.DataSourceOptions
 
 import org.apache.bahir.utils.BahirUtils
 
@@ -40,9 +41,13 @@ class MQTTStreamSourceSuite extends SparkFunSuite with SharedSparkContext with B
   protected val tempDir: File = new File(System.getProperty("java.io.tmpdir") + "/mqtt-test/")
 
   before {
+    tempDir.mkdirs()
+    if (!tempDir.exists()) {
+      throw new IllegalStateException("Unable to create temp directories.")
+    }
+    tempDir.deleteOnExit()
     mqttTestUtils = new MQTTTestUtils(tempDir)
     mqttTestUtils.setup()
-    tempDir.mkdirs()
   }
 
   after {
@@ -60,7 +65,7 @@ class MQTTStreamSourceSuite extends SparkFunSuite with SharedSparkContext with B
 
     val dataFrame: DataFrame =
       sqlContext.readStream.format("org.apache.bahir.sql.streaming.mqtt.MQTTStreamSourceProvider")
-        .option("topic", "test").option("localStorage", dir).option("clientId", "clientId")
+        .option("topic", "test").option("persistence", "memory").option("clientId", "clientId")
         .option("QoS", "2").load("tcp://" + mqttTestUtils.brokerUri)
     (sqlContext, dataFrame)
   }
@@ -102,7 +107,7 @@ class BasicMQTTSourceSuite extends MQTTStreamSourceSuite {
   }
 
   // TODO: reinstate this test after fixing BAHIR-83
-  ignore("Send and receive 100 messages.") {
+  ignore("Send and receive 50 messages.") {
 
     val sendMessage = "MQTT is a message queue."
 
@@ -111,38 +116,37 @@ class BasicMQTTSourceSuite extends MQTTStreamSourceSuite {
     val (sqlContext: SQLContext, dataFrame: DataFrame) = createStreamingDataframe()
 
     Future {
-      Thread.sleep(2000)
-      mqttTestUtils.publishData("test", sendMessage, 100)
+      Thread.sleep(4000)
+      mqttTestUtils.publishData("test", sendMessage, 50)
     }
 
-    writeStreamResults(sqlContext, dataFrame, 10000)
+    // writeStreamResults(sqlContext, dataFrame, 22000)
 
     val resultBuffer: mutable.Buffer[String] = readBackStreamingResults(sqlContext)
 
-    assert(resultBuffer.size == 100)
+    assert(resultBuffer.size == 50)
     assert(resultBuffer.head == sendMessage)
   }
 
   test("no server up") {
     val provider = new MQTTStreamSourceProvider
     val sqlContext: SQLContext = new SQLContext(sc)
-    val parameters = Map("brokerUrl" -> "tcp://localhost:1883", "topic" -> "test",
-      "localStorage" -> tmpDir)
+    val parameters = new DataSourceOptions(Map("brokerUrl" ->
+      "tcp://localhost:1881", "topic" -> "test", "localStorage" -> tmpDir).asJava)
     intercept[MqttException] {
-      provider.createSource(sqlContext, "", None, "", parameters)
+      provider.createMicroBatchReader(Optional.empty(), tempDir.toString, parameters)
     }
   }
 
   test("params not provided.") {
     val provider = new MQTTStreamSourceProvider
-    val sqlContext: SQLContext = new SQLContext(sc)
-    val parameters = Map("brokerUrl" -> mqttTestUtils.brokerUri,
-      "localStorage" -> tmpDir)
+    val parameters = new DataSourceOptions(Map("brokerUrl" -> mqttTestUtils.brokerUri,
+      "localStorage" -> tmpDir).asJava)
     intercept[IllegalArgumentException] {
-      provider.createSource(sqlContext, "", None, "", parameters)
+      provider.createMicroBatchReader(Optional.empty(), tempDir.toString, parameters)
     }
     intercept[IllegalArgumentException] {
-      provider.createSource(sqlContext, "", None, "", Map())
+      provider.createMicroBatchReader(Optional.empty(), tempDir.toString, DataSourceOptions.empty())
     }
   }
 
@@ -160,14 +164,16 @@ class BasicMQTTSourceSuite extends MQTTStreamSourceSuite {
       mqttTestUtils.publishData("test", sendMessage, 100)
     }
 
-    writeStreamResults(sqlContext, dataFrame, 10000)
+    // writeStreamResults(sqlContext, dataFrame, 10000)
     // On restarting the source with same params, it should begin from the offset - the
     // previously running stream left off.
-    val provider = new MQTTStreamSourceProvider
-    val parameters = Map("brokerUrl" -> ("tcp://" + mqttTestUtils.brokerUri), "topic" -> "test",
-      "localStorage" -> tmpDir, "clientId" -> "clientId", "QoS" -> "2")
-    val offset: Long = provider.createSource(sqlContext, "", None, "", parameters)
-      .getOffset.get.asInstanceOf[LongOffset].offset
+    val provider: MQTTStreamSourceProvider = new MQTTStreamSourceProvider
+    val parameters = new DataSourceOptions(Map("brokerUrl" -> ("tcp://" + mqttTestUtils.brokerUri),
+      "topic" -> "test", "localStorage" -> tmpDir, "clientId" -> "clientId", "QoS" -> "2").asJava)
+    val source = provider.createMicroBatchReader(Optional.empty(), tempDir.toString,
+      parameters)
+    val offset: Long = source.getEndOffset.asInstanceOf[LongOffset].offset
+    source.stop()
     assert(offset == 100L)
   }
 
diff --git a/sql-streaming-mqtt/src/test/scala/org/apache/bahir/sql/streaming/mqtt/MQTTTestUtils.scala b/sql-streaming-mqtt/src/test/scala/org/apache/bahir/sql/streaming/mqtt/MQTTTestUtils.scala
index 9c7399f3..d717d119 100644
--- a/sql-streaming-mqtt/src/test/scala/org/apache/bahir/sql/streaming/mqtt/MQTTTestUtils.scala
+++ b/sql-streaming-mqtt/src/test/scala/org/apache/bahir/sql/streaming/mqtt/MQTTTestUtils.scala
@@ -22,7 +22,7 @@ import java.net.{ServerSocket, URI}
 
 import org.apache.activemq.broker.{BrokerService, TransportConnector}
 import org.eclipse.paho.client.mqttv3._
-import org.eclipse.paho.client.mqttv3.persist.MqttDefaultFilePersistence
+import org.eclipse.paho.client.mqttv3.persist.{MemoryPersistence, MqttDefaultFilePersistence}
 
 import org.apache.bahir.utils.Logging
 
@@ -30,7 +30,7 @@ import org.apache.bahir.utils.Logging
 class MQTTTestUtils(tempDir: File, port: Int = 0) extends Logging {
 
   private val persistenceDir = tempDir.getAbsolutePath
-  private val brokerHost = "localhost"
+  private val brokerHost = "127.0.0.1"
   private val brokerPort: Int = if (port == 0) findFreePort() else port
 
   private var broker: BrokerService = _
@@ -60,18 +60,21 @@ class MQTTTestUtils(tempDir: File, port: Int = 0) extends Logging {
   def teardown(): Unit = {
     if (broker != null) {
       broker.stop()
-      broker = null
     }
     if (connector != null) {
       connector.stop()
       connector = null
     }
+    while (!broker.isStopped) {
+      Thread.sleep(50)
+    }
+    broker = null
   }
 
   def publishData(topic: String, data: String, N: Int = 1): Unit = {
     var client: MqttClient = null
     try {
-      val persistence = new MqttDefaultFilePersistence(persistenceDir)
+      val persistence = new MemoryPersistence()
       client = new MqttClient("tcp://" + brokerUri, MqttClient.generateClientId(), persistence)
       client.connect()
       if (client.isConnected) {

From bd1125a96adbe47ebc4d6e51eb4fc623d4fc861c Mon Sep 17 00:00:00 2001
From: Prashant Sharma <prashsh1@in.ibm.com>
Date: Thu, 10 May 2018 16:52:39 +0530
Subject: [PATCH 03/10] Fixed tests and added mechanism for deecting
 redelivery.

---
 .../streaming/mqtt/MQTTStreamWordCount.scala  |   4 +-
 .../sql/streaming/mqtt/MQTTStreamSource.scala | 121 ++++++++++++-----
 .../mqtt/LocalMessageStoreSuite.scala         |   3 +-
 .../mqtt/MQTTStreamSourceSuite.scala          | 126 +++++++-----------
 .../sql/streaming/mqtt/MQTTTestUtils.scala    |   5 +-
 5 files changed, 139 insertions(+), 120 deletions(-)

diff --git a/sql-streaming-mqtt/examples/src/main/scala/org/apache/bahir/examples/sql/streaming/mqtt/MQTTStreamWordCount.scala b/sql-streaming-mqtt/examples/src/main/scala/org/apache/bahir/examples/sql/streaming/mqtt/MQTTStreamWordCount.scala
index a573744a..ee7de22a 100644
--- a/sql-streaming-mqtt/examples/src/main/scala/org/apache/bahir/examples/sql/streaming/mqtt/MQTTStreamWordCount.scala
+++ b/sql-streaming-mqtt/examples/src/main/scala/org/apache/bahir/examples/sql/streaming/mqtt/MQTTStreamWordCount.scala
@@ -53,10 +53,10 @@ object MQTTStreamWordCount  {
     val lines = spark.readStream
       .format("org.apache.bahir.sql.streaming.mqtt.MQTTStreamSourceProvider")
       .option("topic", topic).option("persistence", "memory")
-      .load(brokerUrl).as[(String, Timestamp)]
+      .load(brokerUrl).selectExpr("CAST(payload AS STRING)").as[String]
 
     // Split the lines into words
-    val words = lines.map(_._1).flatMap(_.split(" "))
+    val words = lines.flatMap(_.split(" "))
 
     // Generate running word count
     val wordCounts = words.groupBy("value").count()
diff --git a/sql-streaming-mqtt/src/main/scala/org/apache/bahir/sql/streaming/mqtt/MQTTStreamSource.scala b/sql-streaming-mqtt/src/main/scala/org/apache/bahir/sql/streaming/mqtt/MQTTStreamSource.scala
index b198ca95..2db5ca94 100644
--- a/sql-streaming-mqtt/src/main/scala/org/apache/bahir/sql/streaming/mqtt/MQTTStreamSource.scala
+++ b/sql-streaming-mqtt/src/main/scala/org/apache/bahir/sql/streaming/mqtt/MQTTStreamSource.scala
@@ -20,7 +20,7 @@ package org.apache.bahir.sql.streaming.mqtt
 import java.nio.charset.Charset
 import java.sql.Timestamp
 import java.text.SimpleDateFormat
-import java.util.{Calendar, Optional}
+import java.util.{Calendar, HashSet => JHashSet, Optional}
 import javax.annotation.concurrent.GuardedBy
 
 import scala.collection.JavaConverters._
@@ -37,7 +37,7 @@ import org.apache.spark.sql.sources.DataSourceRegister
 import org.apache.spark.sql.sources.v2.{DataSourceOptions, DataSourceV2, MicroBatchReadSupport}
 import org.apache.spark.sql.sources.v2.reader.{DataReader, DataReaderFactory}
 import org.apache.spark.sql.sources.v2.reader.streaming.{MicroBatchReader, Offset => OffsetV2}
-import org.apache.spark.sql.types.{StringType, StructField, StructType, TimestampType}
+import org.apache.spark.sql.types._
 
 import org.apache.bahir.utils.Logging
 
@@ -46,15 +46,38 @@ object MQTTStreamConstants {
 
   val DATE_FORMAT = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
 
-  val SCHEMA_DEFAULT = StructType(StructField("value", StringType)
-    :: StructField("timestamp", TimestampType) :: Nil)
+  val SCHEMA_DEFAULT = StructType(StructField("id", IntegerType) :: StructField("topic",
+    StringType):: StructField("payload", BinaryType) :: StructField("timestamp", TimestampType) ::
+    Nil)
 }
 
+class MQTTMessage(m: MqttMessage, val topic: String) extends Serializable {
+
+  // TODO: make it configurable.
+  val timestamp: Timestamp = Timestamp.valueOf(
+    MQTTStreamConstants.DATE_FORMAT.format(Calendar.getInstance().getTime))
+  val duplicate = m.isDuplicate
+  val retained = m.isRetained
+  val qos = m.getQos
+  val id: Int = m.getId
+
+  val payload: Array[Byte] = m.getPayload
+
+  override def toString(): String = {
+    s"""MQTTMessage.
+       |Topic: ${this.topic}
+       |MessageID: ${this.id}
+       |QoS: ${this.qos}
+       |Payload: ${this.payload}
+       |Payload as string: ${new String(this.payload, Charset.defaultCharset())}
+       |isRetained: ${this.retained}
+       |isDuplicate: ${this.duplicate}
+       |TimeStamp: ${this.timestamp}
+     """.stripMargin
+  }
+}
 /**
- * A Text based mqtt stream source, it interprets the payload of each incoming message by converting
- * the bytes to String using Charset.defaultCharset as charset. Each value is associated with a
- * timestamp of arrival of the message on the source. It can be used to operate a window on the
- * incoming stream.
+ * A mqtt stream source.
  *
  * @param brokerUrl url MqttClient connects to.
  * @param persistence an instance of MqttClientPersistence. By default it is used for storing
@@ -63,26 +86,31 @@ object MQTTStreamConstants {
  * @param topic topic MqttClient subscribes to.
  * @param clientId clientId, this client is assoicated with. Provide the same value to recover
  *                 a stopped client.
- * @param messageParser parsing logic for processing incoming messages from Mqtt Server.
  * @param mqttConnectOptions an instance of MqttConnectOptions for this Source.
  * @param qos the maximum quality of service to subscribe each topic at.Messages published at
  *            a lower quality of service will be received at the published QoS. Messages
  *            published at a higher quality of service will be received using the QoS specified
  *            on the subscribe.
  */
-class MQTTTextStreamSource(options: DataSourceOptions, brokerUrl: String, persistence:
+class MQTTStreamSource(options: DataSourceOptions, brokerUrl: String, persistence:
     MqttClientPersistence, topic: String, clientId: String,
-    messageParser: Array[Byte] => (String, Timestamp),
     mqttConnectOptions: MqttConnectOptions, qos: Int)
   extends MicroBatchReader with Logging {
 
   private var startOffset: OffsetV2 = _
   private var endOffset: OffsetV2 = _
 
+  /* Older than last N messages, will not be checked for redelivery. */
+  val backLog = options.getInt("autopruning.backlog", 500)
 
   private val store = new LocalMessageStore(persistence)
 
-  private val messages = new TrieMap[Long, (String, Timestamp)]
+  private val messages = new TrieMap[Long, MQTTMessage]
+
+  @GuardedBy("this")
+  private val processedMessageIds = new JHashSet[Int](backLog)
+
+  private var maxIdProcessed = 0
 
   @GuardedBy("this")
   private var currentOffset: LongOffset = LongOffset(-1L)
@@ -90,18 +118,19 @@ class MQTTTextStreamSource(options: DataSourceOptions, brokerUrl: String, persis
   @GuardedBy("this")
   private var lastOffsetCommitted: LongOffset = LongOffset(-1L)
 
-
   private var client: MqttClient = _
 
   private def fetchLastProcessedOffset(): LongOffset = {
     Try(store.maxProcessedOffset) match {
-      case Success(x) =>
-        log.info(s"Recovering from last stored offset $x")
+      case Success(x) => // Data processed so far, is not replayed again.
+        log.info(s"Trying to resume from last processed offset $x")
         LongOffset(x)
       case Failure(e) => LongOffset(-1L)
     }
   }
 
+  private[mqtt] def getCurrentOffset = currentOffset
+
   initialize()
   private def initialize(): Unit = {
 
@@ -109,10 +138,15 @@ class MQTTTextStreamSource(options: DataSourceOptions, brokerUrl: String, persis
     val callback = new MqttCallbackExtended() {
 
       override def messageArrived(topic_ : String, message: MqttMessage): Unit = synchronized {
-        val offset = currentOffset.offset + 1L
-        messages.put(offset, messageParser(message.getPayload))
-        currentOffset = LongOffset(offset)
-        log.info(s"Message arrived, $topic_ $message")
+        val mqttMessage = new MQTTMessage(message, topic_)
+        if(!processedMessageIds.contains(mqttMessage.id)) {
+          val offset = currentOffset.offset + 1L
+          messages.put(offset, mqttMessage)
+          currentOffset = LongOffset(offset)
+          log.trace(s"Message arrived, $topic_ $mqttMessage")
+        } else {
+          log.debug(s"Ignored redelivery of $mqttMessage")
+        }
       }
 
       override def deliveryComplete(token: IMqttDeliveryToken): Unit = {
@@ -130,6 +164,9 @@ class MQTTTextStreamSource(options: DataSourceOptions, brokerUrl: String, persis
     client.connect(mqttConnectOptions)
     // It is not possible to initialize offset without `client.connect`
     lastOffsetCommitted = fetchLastProcessedOffset()
+    startOffset = lastOffsetCommitted
+    endOffset = lastOffsetCommitted
+    currentOffset = lastOffsetCommitted
     client.subscribe(topic, qos)
   }
 
@@ -156,18 +193,28 @@ class MQTTTextStreamSource(options: DataSourceOptions, brokerUrl: String, persis
   }
 
   override def createDataReaderFactories(): java.util.List[DataReaderFactory[Row]] = {
-    // Internal buffer only holds the batches after lastOffsetCommitted
-    val rawList: IndexedSeq[(String, Timestamp)] = synchronized {
+    val set = new JHashSet[Int]()
+    val rawList: IndexedSeq[Option[MQTTMessage]] = synchronized {
       val sliceStart = LongOffset.convert(startOffset).get.offset + 1
       val sliceEnd = LongOffset.convert(endOffset).get.offset + 1
-      for ( i <- sliceStart until sliceEnd) yield messages(i)
+      for ( i <- sliceStart until sliceEnd) yield {
+        val m = messages(i)
+        // Only process the messages not already processed.
+        if (!processedMessageIds.contains(m.id) && !set.contains(m.id)) {
+          set.add(m.id)
+          if (maxIdProcessed < m.id) {
+            maxIdProcessed = m.id
+          }
+          Some(m)
+        } else None
+      }
     }
-
+    processedMessageIds.addAll(set)
     val spark = SparkSession.getActiveSession.get
     val numPartitions = spark.sparkContext.defaultParallelism
 
-    val slices = Array.fill(numPartitions)(new ListBuffer[(String, Timestamp)])
-    rawList.zipWithIndex.foreach { case (r, idx) =>
+    val slices = Array.fill(numPartitions)(new ListBuffer[MQTTMessage])
+    rawList.flatten.zipWithIndex.foreach { case (r, idx) =>
       slices(idx % numPartitions).append(r)
     }
 
@@ -183,7 +230,8 @@ class MQTTTextStreamSource(options: DataSourceOptions, brokerUrl: String, persis
           }
 
           override def get(): Row = {
-            Row(slice(currentIdx)._1, slice(currentIdx)._2)
+            Row(slice(currentIdx).id, slice(currentIdx).topic,
+              slice(currentIdx).payload, slice(currentIdx).timestamp)
           }
 
           override def close(): Unit = {}
@@ -208,6 +256,12 @@ class MQTTTextStreamSource(options: DataSourceOptions, brokerUrl: String, persis
       messages.remove(x + 1)
     }
     lastOffsetCommitted = newOffset
+    if (processedMessageIds.size() > 2 * backLog) {
+      // Prune extra messages.
+      val toBePruned = processedMessageIds.asScala.filter(_ < (maxIdProcessed - backLog))
+      toBePruned.foreach(processedMessageIds.remove)
+      log.debug(s"Pruned processedMessageIds and removed ${toBePruned.size} entries.")
+    }
   }
 
   /** Stop this source. */
@@ -226,11 +280,10 @@ class MQTTStreamSourceProvider extends DataSourceV2
 
   override def createMicroBatchReader(schema: Optional[StructType],
       checkpointLocation: String, parameters: DataSourceOptions): MicroBatchReader = {
+    def e(s: String) = new IllegalArgumentException(s)
     if (schema.isPresent) {
-      throw
-        new IllegalArgumentException("The mqtt source does not support a user-specified schema.")
+      throw e("The mqtt source does not support a user-specified schema.")
     }
-    def e(s: String) = new IllegalArgumentException(s)
 
     val brokerUrl = parameters.get("brokerUrl").orElse(parameters.get("path").orElse(null))
 
@@ -242,15 +295,11 @@ class MQTTStreamSourceProvider extends DataSourceV2
       case "memory" => new MemoryPersistence()
       case _ => val localStorage: String = parameters.get("localStorage").orElse("")
         localStorage match {
+          case "" => new MqttDefaultFilePersistence()
           case x => new MqttDefaultFilePersistence(x)
-          case _ => new MqttDefaultFilePersistence()
         }
     }
 
-    val messageParserWithTimeStamp = (x: Array[Byte]) =>
-      (new String(x, Charset.defaultCharset()), Timestamp.valueOf(
-        MQTTStreamConstants.DATE_FORMAT.format(Calendar.getInstance().getTime)))
-
     // if default is subscribe everything, it leads to getting lot unwanted system messages.
     val topic: String = parameters.get("topic").orElse(null)
     if (topic == null) {
@@ -287,8 +336,8 @@ class MQTTStreamSourceProvider extends DataSourceV2
       case _ =>
     }
 
-    new  MQTTTextStreamSource(parameters, brokerUrl, persistence, topic, clientId,
-      messageParserWithTimeStamp, mqttConnectOptions, qos)
+    new  MQTTStreamSource(parameters, brokerUrl, persistence, topic, clientId,
+      mqttConnectOptions, qos)
   }
   override def shortName(): String = "mqtt"
 }
diff --git a/sql-streaming-mqtt/src/test/scala/org/apache/bahir/sql/streaming/mqtt/LocalMessageStoreSuite.scala b/sql-streaming-mqtt/src/test/scala/org/apache/bahir/sql/streaming/mqtt/LocalMessageStoreSuite.scala
index 21e857da..0a2a079c 100644
--- a/sql-streaming-mqtt/src/test/scala/org/apache/bahir/sql/streaming/mqtt/LocalMessageStoreSuite.scala
+++ b/sql-streaming-mqtt/src/test/scala/org/apache/bahir/sql/streaming/mqtt/LocalMessageStoreSuite.scala
@@ -22,8 +22,7 @@ import java.io.File
 import org.eclipse.paho.client.mqttv3.persist.MqttDefaultFilePersistence
 import org.scalatest.BeforeAndAfter
 
-import org.apache.spark.{SparkConf, SparkFunSuite}
-import org.apache.spark.serializer.JavaSerializer
+import org.apache.spark.SparkFunSuite
 
 import org.apache.bahir.utils.BahirUtils
 
diff --git a/sql-streaming-mqtt/src/test/scala/org/apache/bahir/sql/streaming/mqtt/MQTTStreamSourceSuite.scala b/sql-streaming-mqtt/src/test/scala/org/apache/bahir/sql/streaming/mqtt/MQTTStreamSourceSuite.scala
index 03ea213c..cd03acb3 100644
--- a/sql-streaming-mqtt/src/test/scala/org/apache/bahir/sql/streaming/mqtt/MQTTStreamSourceSuite.scala
+++ b/sql-streaming-mqtt/src/test/scala/org/apache/bahir/sql/streaming/mqtt/MQTTStreamSourceSuite.scala
@@ -18,12 +18,10 @@
 package org.apache.bahir.sql.streaming.mqtt
 
 import java.io.File
-import java.sql.Timestamp
 import java.util.Optional
 
 import scala.collection.JavaConverters._
 import scala.collection.mutable
-import scala.concurrent.Future
 
 import org.eclipse.paho.client.mqttv3.MqttException
 import org.scalatest.BeforeAndAfter
@@ -31,10 +29,10 @@ import org.scalatest.BeforeAndAfter
 import org.apache.spark.{SharedSparkContext, SparkFunSuite}
 import org.apache.spark.sql._
 import org.apache.spark.sql.sources.v2.DataSourceOptions
+import org.apache.spark.sql.streaming.{DataStreamReader, StreamingQuery}
 
 import org.apache.bahir.utils.BahirUtils
 
-
 class MQTTStreamSourceSuite extends SparkFunSuite with SharedSparkContext with BeforeAndAfter {
 
   protected var mqttTestUtils: MQTTTestUtils = _
@@ -57,16 +55,42 @@ class MQTTStreamSourceSuite extends SparkFunSuite with SharedSparkContext with B
 
   protected val tmpDir: String = tempDir.getAbsolutePath
 
-  protected def createStreamingDataframe(dir: String = tmpDir): (SQLContext, DataFrame) = {
+  protected def writeStreamResults(sqlContext: SQLContext,
+                                 dataFrame: DataFrame): StreamingQuery = {
+    import sqlContext.implicits._
+    val query: StreamingQuery = dataFrame.selectExpr("CAST(payload AS STRING)").as[String]
+      .writeStream.format("parquet").start(s"$tmpDir/t.parquet")
+    while (!query.isActive) {}
+    query
+  }
+
+  protected def readBackStreamingResults(sqlContext: SQLContext): mutable.Buffer[String] = {
+    import sqlContext.implicits._
+    val asList =
+      sqlContext.read
+        .parquet(s"$tmpDir/t.parquet").as[String]
+        .collectAsList().asScala
+    asList
+  }
+
+  protected def createStreamingDataframe(dir: String = tmpDir,
+      filePersistence: Boolean = false): (SQLContext, DataFrame) = {
 
     val sqlContext: SQLContext = new SQLContext(sc)
 
     sqlContext.setConf("spark.sql.streaming.checkpointLocation", tmpDir)
 
-    val dataFrame: DataFrame =
+    val ds: DataStreamReader =
       sqlContext.readStream.format("org.apache.bahir.sql.streaming.mqtt.MQTTStreamSourceProvider")
-        .option("topic", "test").option("persistence", "memory").option("clientId", "clientId")
-        .option("QoS", "2").load("tcp://" + mqttTestUtils.brokerUri)
+        .option("topic", "test").option("clientId", "clientId")
+        .option("QoS", "2")
+
+    val dataFrame = if (!filePersistence) {
+      ds.option("persistence", "memory").load("tcp://" + mqttTestUtils.brokerUri)
+    } else {
+      ds.option("persistence", "file").option("localStorage", tmpDir)
+        .load("tcp://" + mqttTestUtils.brokerUri)
+    }
     (sqlContext, dataFrame)
   }
 
@@ -74,31 +98,17 @@ class MQTTStreamSourceSuite extends SparkFunSuite with SharedSparkContext with B
 
 class BasicMQTTSourceSuite extends MQTTStreamSourceSuite {
 
-  private def writeStreamResults(sqlContext: SQLContext,
-      dataFrame: DataFrame, waitDuration: Long): Boolean = {
-    import sqlContext.implicits._
-    dataFrame.as[(String, Timestamp)].writeStream.format("parquet").start(s"$tmpDir/t.parquet")
-      .awaitTermination(waitDuration)
-  }
-
-  private def readBackStreamingResults(sqlContext: SQLContext): mutable.Buffer[String] = {
-    import sqlContext.implicits._
-    val asList =
-      sqlContext.read.schema(MQTTStreamConstants.SCHEMA_DEFAULT)
-        .parquet(s"$tmpDir/t.parquet").as[(String, Timestamp)].map(_._1)
-        .collectAsList().asScala
-    asList
-  }
-
   test("basic usage") {
 
     val sendMessage = "MQTT is a message queue."
 
-    mqttTestUtils.publishData("test", sendMessage)
 
     val (sqlContext: SQLContext, dataFrame: DataFrame) = createStreamingDataframe()
 
-    writeStreamResults(sqlContext, dataFrame, 5000)
+    val query = writeStreamResults(sqlContext, dataFrame)
+    mqttTestUtils.publishData("test", sendMessage)
+    query.processAllAvailable()
+    query.awaitTermination(5000)
 
     val resultBuffer: mutable.Buffer[String] = readBackStreamingResults(sqlContext)
 
@@ -106,21 +116,17 @@ class BasicMQTTSourceSuite extends MQTTStreamSourceSuite {
     assert(resultBuffer.head == sendMessage)
   }
 
-  // TODO: reinstate this test after fixing BAHIR-83
-  ignore("Send and receive 50 messages.") {
+  test("Send and receive 50 messages.") {
 
     val sendMessage = "MQTT is a message queue."
 
-    import scala.concurrent.ExecutionContext.Implicits.global
-
     val (sqlContext: SQLContext, dataFrame: DataFrame) = createStreamingDataframe()
 
-    Future {
-      Thread.sleep(4000)
-      mqttTestUtils.publishData("test", sendMessage, 50)
-    }
+    val q = writeStreamResults(sqlContext, dataFrame)
 
-    // writeStreamResults(sqlContext, dataFrame, 22000)
+    mqttTestUtils.publishData("test", sendMessage, 50)
+    q.processAllAvailable()
+    q.awaitTermination(10000)
 
     val resultBuffer: mutable.Buffer[String] = readBackStreamingResults(sqlContext)
 
@@ -150,45 +156,18 @@ class BasicMQTTSourceSuite extends MQTTStreamSourceSuite {
     }
   }
 
-  // TODO: reinstate this test after fixing BAHIR-83
-  ignore("Recovering offset from the last processed offset.") {
-    val sendMessage = "MQTT is a message queue."
-
-    import scala.concurrent.ExecutionContext.Implicits.global
-
-    val (sqlContext: SQLContext, dataFrame: DataFrame) =
-      createStreamingDataframe()
-
-    Future {
-      Thread.sleep(2000)
-      mqttTestUtils.publishData("test", sendMessage, 100)
-    }
-
-    // writeStreamResults(sqlContext, dataFrame, 10000)
-    // On restarting the source with same params, it should begin from the offset - the
-    // previously running stream left off.
-    val provider: MQTTStreamSourceProvider = new MQTTStreamSourceProvider
-    val parameters = new DataSourceOptions(Map("brokerUrl" -> ("tcp://" + mqttTestUtils.brokerUri),
-      "topic" -> "test", "localStorage" -> tmpDir, "clientId" -> "clientId", "QoS" -> "2").asJava)
-    val source = provider.createMicroBatchReader(Optional.empty(), tempDir.toString,
-      parameters)
-    val offset: Long = source.getEndOffset.asInstanceOf[LongOffset].offset
-    source.stop()
-    assert(offset == 100L)
-  }
-
 }
 
 class StressTestMQTTSource extends MQTTStreamSourceSuite {
 
   // Run with -Xmx1024m
-  ignore("Send and receive messages of size 250MB.") {
+  ignore("Send and receive messages of size 100MB.") {
 
     val freeMemory: Long = Runtime.getRuntime.freeMemory()
 
     log.info(s"Available memory before test run is ${freeMemory / (1024 * 1024)}MB.")
 
-    val noOfMsgs = (250 * 1024 * 1024) / (500 * 1024) // 512
+    val noOfMsgs: Int = (100 * 1024 * 1024) / (500 * 1024) // 204
 
     val messageBuilder = new StringBuilder()
     for (i <- 0 until (500 * 1024)) yield messageBuilder.append(((i % 26) + 65).toChar)
@@ -196,22 +175,15 @@ class StressTestMQTTSource extends MQTTStreamSourceSuite {
 
     val (sqlContext: SQLContext, dataFrame: DataFrame) = createStreamingDataframe()
 
-    import scala.concurrent.ExecutionContext.Implicits.global
-    Future {
-      Thread.sleep(2000)
-      mqttTestUtils.publishData("test", sendMessage, noOfMsgs.toInt)
-    }
-
-    import sqlContext.implicits._
-
-    dataFrame.as[(String, Timestamp)].writeStream
-      .format("parquet")
-      .start(s"$tmpDir/t.parquet")
-      .awaitTermination(25000)
+    val query = writeStreamResults(sqlContext, dataFrame)
+    mqttTestUtils.publishData("test", sendMessage, noOfMsgs / 2)
+    mqttTestUtils.publishData("test", sendMessage, noOfMsgs / 2)
+    query.processAllAvailable()
+    query.awaitTermination(25000)
 
     val messageCount =
-      sqlContext.read.schema(MQTTStreamConstants.SCHEMA_DEFAULT)
-        .parquet(s"$tmpDir/t.parquet").as[(String, Timestamp)].map(_._1)
+      sqlContext.read
+        .parquet(s"$tmpDir/t.parquet")
         .count()
     assert(messageCount == noOfMsgs)
   }
diff --git a/sql-streaming-mqtt/src/test/scala/org/apache/bahir/sql/streaming/mqtt/MQTTTestUtils.scala b/sql-streaming-mqtt/src/test/scala/org/apache/bahir/sql/streaming/mqtt/MQTTTestUtils.scala
index d717d119..f105a631 100644
--- a/sql-streaming-mqtt/src/test/scala/org/apache/bahir/sql/streaming/mqtt/MQTTTestUtils.scala
+++ b/sql-streaming-mqtt/src/test/scala/org/apache/bahir/sql/streaming/mqtt/MQTTTestUtils.scala
@@ -29,7 +29,6 @@ import org.apache.bahir.utils.Logging
 
 class MQTTTestUtils(tempDir: File, port: Int = 0) extends Logging {
 
-  private val persistenceDir = tempDir.getAbsolutePath
   private val brokerHost = "127.0.0.1"
   private val brokerPort: Int = if (port == 0) findFreePort() else port
 
@@ -81,10 +80,10 @@ class MQTTTestUtils(tempDir: File, port: Int = 0) extends Logging {
         val msgTopic = client.getTopic(topic)
         for (i <- 0 until N) {
           try {
-            Thread.sleep(20)
+            Thread.sleep(10)
             val message = new MqttMessage(data.getBytes())
             message.setQos(2)
-            message.setRetained(true)
+            // message.setId(i) setting id has no effect.
             msgTopic.publish(message)
           } catch {
             case e: MqttException =>

From 4786d3b791c16beb591ed2059af4c4abbb822c70 Mon Sep 17 00:00:00 2001
From: Prashant Sharma <prashsh1@in.ibm.com>
Date: Thu, 10 May 2018 16:58:08 +0530
Subject: [PATCH 04/10] Removed support for detecting redelivery, because Mqtt
 does not give a way to  distinguish a normal message from a re-attempted
 delivery.

On each reattempt seqid of the received message is changed and duplicate is set
to false. This way there is no way to say a message was a normal message or a
redelivery.

This patch also removes the support for fetching last processed offset from disk.
Because, doing so does not serve a purpose, as we do not support complete replay of
messages anyway.

closes #19 and BAHIR-46
---
 .../bahir/sql/streaming/mqtt/LongOffset.scala | 54 ++++++++++++++++++
 .../sql/streaming/mqtt/MQTTStreamSource.scala | 56 +++----------------
 2 files changed, 62 insertions(+), 48 deletions(-)
 create mode 100644 sql-streaming-mqtt/src/main/scala/org/apache/bahir/sql/streaming/mqtt/LongOffset.scala

diff --git a/sql-streaming-mqtt/src/main/scala/org/apache/bahir/sql/streaming/mqtt/LongOffset.scala b/sql-streaming-mqtt/src/main/scala/org/apache/bahir/sql/streaming/mqtt/LongOffset.scala
new file mode 100644
index 00000000..345b576d
--- /dev/null
+++ b/sql-streaming-mqtt/src/main/scala/org/apache/bahir/sql/streaming/mqtt/LongOffset.scala
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.bahir.sql.streaming.mqtt
+
+import org.apache.spark.sql.execution.streaming.Offset
+import org.apache.spark.sql.execution.streaming.SerializedOffset
+import org.apache.spark.sql.sources.v2.reader.streaming.{Offset => OffsetV2}
+
+/**
+ * A simple offset for sources that produce a single linear stream of data.
+ */
+case class LongOffset(offset: Long) extends OffsetV2 {
+
+  override val json = offset.toString
+
+  def +(increment: Long): LongOffset = new LongOffset(offset + increment)
+  def -(decrement: Long): LongOffset = new LongOffset(offset - decrement)
+}
+
+object LongOffset {
+
+  /**
+   * LongOffset factory from serialized offset.
+   *
+   * @return new LongOffset
+   */
+  def apply(offset: SerializedOffset) : LongOffset = new LongOffset(offset.json.toLong)
+
+  /**
+   * Convert generic Offset to LongOffset if possible.
+   *
+   * @return converted LongOffset
+   */
+  def convert(offset: Offset): Option[LongOffset] = offset match {
+    case lo: LongOffset => Some(lo)
+    case so: SerializedOffset => Some(LongOffset(so))
+    case _ => None
+  }
+}
diff --git a/sql-streaming-mqtt/src/main/scala/org/apache/bahir/sql/streaming/mqtt/MQTTStreamSource.scala b/sql-streaming-mqtt/src/main/scala/org/apache/bahir/sql/streaming/mqtt/MQTTStreamSource.scala
index 2db5ca94..e1e26076 100644
--- a/sql-streaming-mqtt/src/main/scala/org/apache/bahir/sql/streaming/mqtt/MQTTStreamSource.scala
+++ b/sql-streaming-mqtt/src/main/scala/org/apache/bahir/sql/streaming/mqtt/MQTTStreamSource.scala
@@ -20,7 +20,7 @@ package org.apache.bahir.sql.streaming.mqtt
 import java.nio.charset.Charset
 import java.sql.Timestamp
 import java.text.SimpleDateFormat
-import java.util.{Calendar, HashSet => JHashSet, Optional}
+import java.util.{Calendar, Optional}
 import javax.annotation.concurrent.GuardedBy
 
 import scala.collection.JavaConverters._
@@ -107,11 +107,6 @@ class MQTTStreamSource(options: DataSourceOptions, brokerUrl: String, persistenc
 
   private val messages = new TrieMap[Long, MQTTMessage]
 
-  @GuardedBy("this")
-  private val processedMessageIds = new JHashSet[Int](backLog)
-
-  private var maxIdProcessed = 0
-
   @GuardedBy("this")
   private var currentOffset: LongOffset = LongOffset(-1L)
 
@@ -120,15 +115,6 @@ class MQTTStreamSource(options: DataSourceOptions, brokerUrl: String, persistenc
 
   private var client: MqttClient = _
 
-  private def fetchLastProcessedOffset(): LongOffset = {
-    Try(store.maxProcessedOffset) match {
-      case Success(x) => // Data processed so far, is not replayed again.
-        log.info(s"Trying to resume from last processed offset $x")
-        LongOffset(x)
-      case Failure(e) => LongOffset(-1L)
-    }
-  }
-
   private[mqtt] def getCurrentOffset = currentOffset
 
   initialize()
@@ -139,14 +125,10 @@ class MQTTStreamSource(options: DataSourceOptions, brokerUrl: String, persistenc
 
       override def messageArrived(topic_ : String, message: MqttMessage): Unit = synchronized {
         val mqttMessage = new MQTTMessage(message, topic_)
-        if(!processedMessageIds.contains(mqttMessage.id)) {
-          val offset = currentOffset.offset + 1L
-          messages.put(offset, mqttMessage)
-          currentOffset = LongOffset(offset)
-          log.trace(s"Message arrived, $topic_ $mqttMessage")
-        } else {
-          log.debug(s"Ignored redelivery of $mqttMessage")
-        }
+        val offset = currentOffset.offset + 1L
+        messages.put(offset, mqttMessage)
+        currentOffset = LongOffset(offset)
+        log.trace(s"Message arrived, $topic_ $mqttMessage")
       }
 
       override def deliveryComplete(token: IMqttDeliveryToken): Unit = {
@@ -163,10 +145,6 @@ class MQTTStreamSource(options: DataSourceOptions, brokerUrl: String, persistenc
     client.setCallback(callback)
     client.connect(mqttConnectOptions)
     // It is not possible to initialize offset without `client.connect`
-    lastOffsetCommitted = fetchLastProcessedOffset()
-    startOffset = lastOffsetCommitted
-    endOffset = lastOffsetCommitted
-    currentOffset = lastOffsetCommitted
     client.subscribe(topic, qos)
   }
 
@@ -193,28 +171,16 @@ class MQTTStreamSource(options: DataSourceOptions, brokerUrl: String, persistenc
   }
 
   override def createDataReaderFactories(): java.util.List[DataReaderFactory[Row]] = {
-    val set = new JHashSet[Int]()
-    val rawList: IndexedSeq[Option[MQTTMessage]] = synchronized {
+    val rawList: IndexedSeq[MQTTMessage] = synchronized {
       val sliceStart = LongOffset.convert(startOffset).get.offset + 1
       val sliceEnd = LongOffset.convert(endOffset).get.offset + 1
-      for ( i <- sliceStart until sliceEnd) yield {
-        val m = messages(i)
-        // Only process the messages not already processed.
-        if (!processedMessageIds.contains(m.id) && !set.contains(m.id)) {
-          set.add(m.id)
-          if (maxIdProcessed < m.id) {
-            maxIdProcessed = m.id
-          }
-          Some(m)
-        } else None
-      }
+      for (i <- sliceStart until sliceEnd) yield messages(i)
     }
-    processedMessageIds.addAll(set)
     val spark = SparkSession.getActiveSession.get
     val numPartitions = spark.sparkContext.defaultParallelism
 
     val slices = Array.fill(numPartitions)(new ListBuffer[MQTTMessage])
-    rawList.flatten.zipWithIndex.foreach { case (r, idx) =>
+    rawList.zipWithIndex.foreach { case (r, idx) =>
       slices(idx % numPartitions).append(r)
     }
 
@@ -256,12 +222,6 @@ class MQTTStreamSource(options: DataSourceOptions, brokerUrl: String, persistenc
       messages.remove(x + 1)
     }
     lastOffsetCommitted = newOffset
-    if (processedMessageIds.size() > 2 * backLog) {
-      // Prune extra messages.
-      val toBePruned = processedMessageIds.asScala.filter(_ < (maxIdProcessed - backLog))
-      toBePruned.foreach(processedMessageIds.remove)
-      log.debug(s"Pruned processedMessageIds and removed ${toBePruned.size} entries.")
-    }
   }
 
   /** Stop this source. */

From c859b0c3c188a6313b9e098af5d8104404a31b77 Mon Sep 17 00:00:00 2001
From: Prashant Sharma <prashsh1@in.ibm.com>
Date: Fri, 11 May 2018 11:51:32 +0530
Subject: [PATCH 05/10] Until BAHIR-166 is fixed, disable unusable akka sql
 streaming module.

---
 pom.xml                                                        | 2 +-
 .../bahir/sql/streaming/mqtt/MQTTStreamSourceSuite.scala       | 3 +--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/pom.xml b/pom.xml
index e00d9be8..97c7e2e4 100644
--- a/pom.xml
+++ b/pom.xml
@@ -77,7 +77,7 @@
   <modules>
     <module>sql-cloudant</module>
     <module>streaming-akka</module>
-    <module>sql-streaming-akka</module>
+    <!-- <module>sql-streaming-akka</module> Disabling akka sql module, until it is updated to run with datasource v2 API. -->
     <module>streaming-mqtt</module>
     <module>sql-streaming-mqtt</module>
     <module>streaming-twitter</module>
diff --git a/sql-streaming-mqtt/src/test/scala/org/apache/bahir/sql/streaming/mqtt/MQTTStreamSourceSuite.scala b/sql-streaming-mqtt/src/test/scala/org/apache/bahir/sql/streaming/mqtt/MQTTStreamSourceSuite.scala
index cd03acb3..5482c32b 100644
--- a/sql-streaming-mqtt/src/test/scala/org/apache/bahir/sql/streaming/mqtt/MQTTStreamSourceSuite.scala
+++ b/sql-streaming-mqtt/src/test/scala/org/apache/bahir/sql/streaming/mqtt/MQTTStreamSourceSuite.scala
@@ -176,8 +176,7 @@ class StressTestMQTTSource extends MQTTStreamSourceSuite {
     val (sqlContext: SQLContext, dataFrame: DataFrame) = createStreamingDataframe()
 
     val query = writeStreamResults(sqlContext, dataFrame)
-    mqttTestUtils.publishData("test", sendMessage, noOfMsgs / 2)
-    mqttTestUtils.publishData("test", sendMessage, noOfMsgs / 2)
+    mqttTestUtils.publishData("test", sendMessage, noOfMsgs )
     query.processAllAvailable()
     query.awaitTermination(25000)
 

From 7adc253c44f117f827fe995fd61ddbbe61436c52 Mon Sep 17 00:00:00 2001
From: Prashant Sharma <prashsh1@in.ibm.com>
Date: Fri, 11 May 2018 12:16:59 +0530
Subject: [PATCH 06/10] BAHIR-83, does not seem to bother us anymore.

---
 bin/test-BAHIR-83.sh | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)
 create mode 100755 bin/test-BAHIR-83.sh

diff --git a/bin/test-BAHIR-83.sh b/bin/test-BAHIR-83.sh
new file mode 100755
index 00000000..7a1ffdc9
--- /dev/null
+++ b/bin/test-BAHIR-83.sh
@@ -0,0 +1,25 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+set -o pipefail
+
+for i in `seq 100` ; do
+  mvn scalatest:test -pl sql-streaming-mqtt -q \
+    -Dsuites='*.BasicMQTTSourceSuite @ Recovering offset from the last processed offset.' | \
+    grep -q "TEST FAILED" && echo "$i: failed"
+done

From 3cdb10236272298cd35d80bdd467520756df14ed Mon Sep 17 00:00:00 2001
From: Prashant Sharma <prashsh1@in.ibm.com>
Date: Fri, 11 May 2018 13:35:45 +0530
Subject: [PATCH 07/10] Updated documents to reflect the change is usage of the
 connector. Also added a best practices guide.

---
 sql-streaming-mqtt/README.md                  | 45 ++++++++++++++++++-
 .../mqtt/JavaMQTTStreamWordCount.java         |  2 +-
 .../mqtt/MQTTStreamSourceSuite.scala          |  3 +-
 3 files changed, 45 insertions(+), 5 deletions(-)

diff --git a/sql-streaming-mqtt/README.md b/sql-streaming-mqtt/README.md
index 2cfbe0f8..d3645849 100644
--- a/sql-streaming-mqtt/README.md
+++ b/sql-streaming-mqtt/README.md
@@ -68,7 +68,7 @@ An example, for scala API to count words from incoming message stream.
     val lines = spark.readStream
       .format("org.apache.bahir.sql.streaming.mqtt.MQTTStreamSourceProvider")
       .option("topic", topic)
-      .load(brokerUrl).as[(String, Timestamp)]
+      .load(brokerUrl).selectExpr("CAST(payload AS STRING)").as[String]
 
     // Split the lines into words
     val words = lines.map(_._1).flatMap(_.split(" "))
@@ -95,7 +95,8 @@ An example, for Java API to count words from incoming message stream.
             .readStream()
             .format("org.apache.bahir.sql.streaming.mqtt.MQTTStreamSourceProvider")
             .option("topic", topic)
-            .load(brokerUrl).select("value").as(Encoders.STRING());
+            .load(brokerUrl)
+            .selectExpr("CAST(payload AS STRING)").as(Encoders.STRING());
 
     // Split the lines into words
     Dataset<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
@@ -118,3 +119,43 @@ An example, for Java API to count words from incoming message stream.
 
 Please see `JavaMQTTStreamWordCount.java` for full example.
 
+## Best Practices.
+
+1. > *MQTT is a machine-to-machine (M2M)/"Internet of Things" connectivity protocol. It was designed as an extremely lightweight publish/subscribe messaging transport.*
+
+The design of Mqtt and the purpose it serves goes well together, but often in an application it is of outmost value to have reliablity. Since mqtt is not a distributed message queue and thus does not offer the highest level of reliability features. It should be redirected via a kafka message queue to take advantage of a distributed message queue. Infact, using a kafka message queue offers a lot of possiblities including a single kafka topic subscribed to several mqtt sources and even a single mqtt stream publishing to multiple kafka topics. Kafka is a reliable and scalable message queue.
+
+2. Often the message payload is not of the default character encoding or contains binary that needs to be parsed using a particular parser. In such cases, spark mqtt payload should be processed using the external parser. For example:
+
+ * Scala API example:
+```scala
+    // Create DataFrame representing the stream of input lines from connection to mqtt server
+    val lines = spark.readStream
+      .format("org.apache.bahir.sql.streaming.mqtt.MQTTStreamSourceProvider")
+      .option("topic", topic)
+      .load(brokerUrl).select("payload").as[Array[Byte]].map(externalParser(_))
+```
+
+ * Java API example
+```java
+        // Create DataFrame representing the stream of input lines from connection to mqtt server
+        Dataset<byte[]> lines = spark
+                .readStream()
+                .format("org.apache.bahir.sql.streaming.mqtt.MQTTStreamSourceProvider")
+                .option("topic", topic)
+                .load(brokerUrl).selectExpr("CAST(payload AS BINARY)").as(Encoders.BINARY());
+
+        // Split the lines into words
+        Dataset<String> words = lines.map(new MapFunction<byte[], String>() {
+            @Override
+            public String call(byte[] bytes) throws Exception {
+                return new String(bytes); // Plug in external parser here.
+            }
+        }, Encoders.STRING()).flatMap(new FlatMapFunction<String, String>() {
+            @Override
+            public Iterator<String> call(String x) {
+                return Arrays.asList(x.split(" ")).iterator();
+            }
+        }, Encoders.STRING());
+
+```
diff --git a/sql-streaming-mqtt/examples/src/main/java/org/apache/bahir/examples/sql/streaming/mqtt/JavaMQTTStreamWordCount.java b/sql-streaming-mqtt/examples/src/main/java/org/apache/bahir/examples/sql/streaming/mqtt/JavaMQTTStreamWordCount.java
index 519d9a03..4e87c990 100644
--- a/sql-streaming-mqtt/examples/src/main/java/org/apache/bahir/examples/sql/streaming/mqtt/JavaMQTTStreamWordCount.java
+++ b/sql-streaming-mqtt/examples/src/main/java/org/apache/bahir/examples/sql/streaming/mqtt/JavaMQTTStreamWordCount.java
@@ -71,7 +71,7 @@ public static void main(String[] args) throws Exception {
                 .readStream()
                 .format("org.apache.bahir.sql.streaming.mqtt.MQTTStreamSourceProvider")
                 .option("topic", topic)
-                .load(brokerUrl).select("value").as(Encoders.STRING());
+                .load(brokerUrl).selectExpr("CAST(payload AS STRING)").as(Encoders.STRING());
 
         // Split the lines into words
         Dataset<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
diff --git a/sql-streaming-mqtt/src/test/scala/org/apache/bahir/sql/streaming/mqtt/MQTTStreamSourceSuite.scala b/sql-streaming-mqtt/src/test/scala/org/apache/bahir/sql/streaming/mqtt/MQTTStreamSourceSuite.scala
index 5482c32b..61ce63d3 100644
--- a/sql-streaming-mqtt/src/test/scala/org/apache/bahir/sql/streaming/mqtt/MQTTStreamSourceSuite.scala
+++ b/sql-streaming-mqtt/src/test/scala/org/apache/bahir/sql/streaming/mqtt/MQTTStreamSourceSuite.scala
@@ -55,8 +55,7 @@ class MQTTStreamSourceSuite extends SparkFunSuite with SharedSparkContext with B
 
   protected val tmpDir: String = tempDir.getAbsolutePath
 
-  protected def writeStreamResults(sqlContext: SQLContext,
-                                 dataFrame: DataFrame): StreamingQuery = {
+  protected def writeStreamResults(sqlContext: SQLContext, dataFrame: DataFrame): StreamingQuery = {
     import sqlContext.implicits._
     val query: StreamingQuery = dataFrame.selectExpr("CAST(payload AS STRING)").as[String]
       .writeStream.format("parquet").start(s"$tmpDir/t.parquet")

From 201ff5a37f8e40f9a7f70c60189f91374ff91555 Mon Sep 17 00:00:00 2001
From: Prashant Sharma <prashsh1@in.ibm.com>
Date: Fri, 11 May 2018 17:01:48 +0530
Subject: [PATCH 08/10] Spell checked, and added another best practice
 scenario.

---
 sql-streaming-mqtt/README.md | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/sql-streaming-mqtt/README.md b/sql-streaming-mqtt/README.md
index d3645849..0b9d7c72 100644
--- a/sql-streaming-mqtt/README.md
+++ b/sql-streaming-mqtt/README.md
@@ -121,15 +121,17 @@ Please see `JavaMQTTStreamWordCount.java` for full example.
 
 ## Best Practices.
 
-1. > *MQTT is a machine-to-machine (M2M)/"Internet of Things" connectivity protocol. It was designed as an extremely lightweight publish/subscribe messaging transport.*
+1. Turn Mqtt into a more reliable messaging service. 
 
-The design of Mqtt and the purpose it serves goes well together, but often in an application it is of outmost value to have reliablity. Since mqtt is not a distributed message queue and thus does not offer the highest level of reliability features. It should be redirected via a kafka message queue to take advantage of a distributed message queue. Infact, using a kafka message queue offers a lot of possiblities including a single kafka topic subscribed to several mqtt sources and even a single mqtt stream publishing to multiple kafka topics. Kafka is a reliable and scalable message queue.
+> *MQTT is a machine-to-machine (M2M)/"Internet of Things" connectivity protocol. It was designed as an extremely lightweight publish/subscribe messaging transport.*
+
+The design of Mqtt and the purpose it serves goes well together, but often in an application it is of utmost value to have reliability. Since mqtt is not a distributed message queue and thus does not offer the highest level of reliability features. It should be redirected via a kafka message queue to take advantage of a distributed message queue. In fact, using a kafka message queue offers a lot of possibilities including a single kafka topic subscribed to several mqtt sources and even a single mqtt stream publishing to multiple kafka topics. Kafka is a reliable and scalable message queue.
 
 2. Often the message payload is not of the default character encoding or contains binary that needs to be parsed using a particular parser. In such cases, spark mqtt payload should be processed using the external parser. For example:
 
  * Scala API example:
 ```scala
-    // Create DataFrame representing the stream of input lines from connection to mqtt server
+    // Create DataFrame representing the stream of binary messages
     val lines = spark.readStream
       .format("org.apache.bahir.sql.streaming.mqtt.MQTTStreamSourceProvider")
       .option("topic", topic)
@@ -138,7 +140,7 @@ The design of Mqtt and the purpose it serves goes well together, but often in an
 
  * Java API example
 ```java
-        // Create DataFrame representing the stream of input lines from connection to mqtt server
+        // Create DataFrame representing the stream of binary messages
         Dataset<byte[]> lines = spark
                 .readStream()
                 .format("org.apache.bahir.sql.streaming.mqtt.MQTTStreamSourceProvider")
@@ -159,3 +161,10 @@ The design of Mqtt and the purpose it serves goes well together, but often in an
         }, Encoders.STRING());
 
 ```
+
+3. What is the solution for a situation when there are a large number of varied mqtt sources, each with different schema and throughput characteristics.
+
+This is an anti-pattern for spark structured streaming, which is designed to process a single schema, high volume streaming feed. Generally, one would create a lot of streaming pipelines to solve this problem. This would either require a very sophisticated scheduling setup or will waste a lot of resources, as it is not certain which stream is using more amount of data.
+
+The general solution is both less optimum and is more cumbersome to operate, with multiple moving parts incurs a high maintenance overall. As an alternative, in this situation, one can setup a single topic kafka-spark stream, where message from each of the varied stream contains a unique tag separating one from other streams. This way at the processing end, one can distinguish the message from one another and apply the right kind of decoding and processing. Similarly while storing, each message can be distinguished from others by a tag that distinguishes.
+

From b284635e0043706fd346b02f232670fbfb9e56c3 Mon Sep 17 00:00:00 2001
From: Prashant Sharma <prashsh1@in.ibm.com>
Date: Fri, 11 May 2018 17:02:07 +0530
Subject: [PATCH 09/10] fixed error reporting.

---
 .../org/apache/bahir/sql/streaming/mqtt/MessageStore.scala  | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sql-streaming-mqtt/src/main/scala/org/apache/bahir/sql/streaming/mqtt/MessageStore.scala b/sql-streaming-mqtt/src/main/scala/org/apache/bahir/sql/streaming/mqtt/MessageStore.scala
index 54b09cd7..d7d26572 100644
--- a/sql-streaming-mqtt/src/main/scala/org/apache/bahir/sql/streaming/mqtt/MessageStore.scala
+++ b/sql-streaming-mqtt/src/main/scala/org/apache/bahir/sql/streaming/mqtt/MessageStore.scala
@@ -63,14 +63,14 @@ trait Serializer {
   def serialize[T](x: T): Array[Byte]
 }
 
-class JavaSerializer extends Serializer {
+class JavaSerializer extends Serializer with Logging {
 
   override def deserialize[T](x: Array[Byte]): T = {
     val bis = new ByteArrayInputStream(x)
     val in = new ObjectInputStream(bis)
     val obj = if (in != null) {
       val o = in.readObject()
-      Try(in.close())
+      Try(in.close()).recover { case t: Throwable => log.warn("failed to close stream", t) }
       o
     } else {
       null
@@ -85,7 +85,7 @@ class JavaSerializer extends Serializer {
     out.flush()
     if (bos != null) {
       val bytes: Array[Byte] = bos.toByteArray
-      Try(bos.close())
+      Try(bos.close()).recover { case t: Throwable => log.warn("failed to close stream", t) }
       bytes
     } else {
       null

From d9a9335bb0c46b48f77ccb173c8a5b9ad23c1ad3 Mon Sep 17 00:00:00 2001
From: Prashant Sharma <prashsh1@in.ibm.com>
Date: Mon, 14 May 2018 16:35:10 +0530
Subject: [PATCH 10/10] Testing BAHIR-83 again.

---
 bin/test-BAHIR-83.sh                               |  3 +--
 sql-streaming-mqtt/README.md                       |  6 ++++--
 .../sql/streaming/mqtt/MQTTStreamSource.scala      |  9 +++++----
 .../sql/streaming/mqtt/MQTTStreamSourceSuite.scala | 14 ++++++++------
 .../bahir/sql/streaming/mqtt/MQTTTestUtils.scala   |  2 +-
 5 files changed, 19 insertions(+), 15 deletions(-)

diff --git a/bin/test-BAHIR-83.sh b/bin/test-BAHIR-83.sh
index 7a1ffdc9..659dd8c8 100755
--- a/bin/test-BAHIR-83.sh
+++ b/bin/test-BAHIR-83.sh
@@ -19,7 +19,6 @@
 set -o pipefail
 
 for i in `seq 100` ; do
-  mvn scalatest:test -pl sql-streaming-mqtt -q \
-    -Dsuites='*.BasicMQTTSourceSuite @ Recovering offset from the last processed offset.' | \
+  mvn scalatest:test -pl sql-streaming-mqtt -q -Dsuites='*.BasicMQTTSourceSuite' | \
     grep -q "TEST FAILED" && echo "$i: failed"
 done
diff --git a/sql-streaming-mqtt/README.md b/sql-streaming-mqtt/README.md
index 0b9d7c72..b7f06021 100644
--- a/sql-streaming-mqtt/README.md
+++ b/sql-streaming-mqtt/README.md
@@ -59,7 +59,9 @@ This source uses [Eclipse Paho Java Client](https://eclipse.org/paho/clients/jav
  * `connectionTimeout` Sets the connection timeout, a value of 0 is interpretted as wait until client connects. See `MqttConnectOptions.setConnectionTimeout` for more information.
  * `keepAlive` Same as `MqttConnectOptions.setKeepAliveInterval`.
  * `mqttVersion` Same as `MqttConnectOptions.setMqttVersion`.
-
+ * `maxInflight` Same as `MqttConnectOptions.setMaxInflight`
+ * `autoReconnect` Same as `MqttConnectOptions.setAutomaticReconnect`
+ 
 ### Scala API
 
 An example, for scala API to count words from incoming message stream. 
@@ -164,7 +166,7 @@ The design of Mqtt and the purpose it serves goes well together, but often in an
 
 3. What is the solution for a situation when there are a large number of varied mqtt sources, each with different schema and throughput characteristics.
 
-This is an anti-pattern for spark structured streaming, which is designed to process a single schema, high volume streaming feed. Generally, one would create a lot of streaming pipelines to solve this problem. This would either require a very sophisticated scheduling setup or will waste a lot of resources, as it is not certain which stream is using more amount of data.
+Generally, one would create a lot of streaming pipelines to solve this problem. This would either require a very sophisticated scheduling setup or will waste a lot of resources, as it is not certain which stream is using more amount of data.
 
 The general solution is both less optimum and is more cumbersome to operate, with multiple moving parts incurs a high maintenance overall. As an alternative, in this situation, one can setup a single topic kafka-spark stream, where message from each of the varied stream contains a unique tag separating one from other streams. This way at the processing end, one can distinguish the message from one another and apply the right kind of decoding and processing. Similarly while storing, each message can be distinguished from others by a tag that distinguishes.
 
diff --git a/sql-streaming-mqtt/src/main/scala/org/apache/bahir/sql/streaming/mqtt/MQTTStreamSource.scala b/sql-streaming-mqtt/src/main/scala/org/apache/bahir/sql/streaming/mqtt/MQTTStreamSource.scala
index e1e26076..2f75ee22 100644
--- a/sql-streaming-mqtt/src/main/scala/org/apache/bahir/sql/streaming/mqtt/MQTTStreamSource.scala
+++ b/sql-streaming-mqtt/src/main/scala/org/apache/bahir/sql/streaming/mqtt/MQTTStreamSource.scala
@@ -27,7 +27,6 @@ import scala.collection.JavaConverters._
 import scala.collection.concurrent.TrieMap
 import scala.collection.immutable.IndexedSeq
 import scala.collection.mutable.ListBuffer
-import scala.util.{Failure, Success, Try}
 
 import org.eclipse.paho.client.mqttv3._
 import org.eclipse.paho.client.mqttv3.persist.{MemoryPersistence, MqttDefaultFilePersistence}
@@ -280,15 +279,17 @@ class MQTTStreamSourceProvider extends DataSourceV2
       .KEEP_ALIVE_INTERVAL_DEFAULT.toString).toInt
     val mqttVersion: Int = parameters.get("mqttVersion").orElse(MqttConnectOptions
       .MQTT_VERSION_DEFAULT.toString).toInt
-    val cleanSession: Boolean = parameters.get("cleanSession").orElse("false").toBoolean
+    val cleanSession: Boolean = parameters.get("cleanSession").orElse("true").toBoolean
     val qos: Int = parameters.get("QoS").orElse("1").toInt
-
+    val autoReconnect: Boolean = parameters.get("autoReconnect").orElse("false").toBoolean
+    val maxInflight: Int = parameters.get("maxInflight").orElse("60").toInt
     val mqttConnectOptions: MqttConnectOptions = new MqttConnectOptions()
-    mqttConnectOptions.setAutomaticReconnect(true)
+    mqttConnectOptions.setAutomaticReconnect(autoReconnect)
     mqttConnectOptions.setCleanSession(cleanSession)
     mqttConnectOptions.setConnectionTimeout(connectionTimeout)
     mqttConnectOptions.setKeepAliveInterval(keepAlive)
     mqttConnectOptions.setMqttVersion(mqttVersion)
+    mqttConnectOptions.setMaxInflight(maxInflight)
     (username, password) match {
       case (u: String, p: String) if u != null && p != null =>
         mqttConnectOptions.setUserName(u)
diff --git a/sql-streaming-mqtt/src/test/scala/org/apache/bahir/sql/streaming/mqtt/MQTTStreamSourceSuite.scala b/sql-streaming-mqtt/src/test/scala/org/apache/bahir/sql/streaming/mqtt/MQTTStreamSourceSuite.scala
index 61ce63d3..2ce72da9 100644
--- a/sql-streaming-mqtt/src/test/scala/org/apache/bahir/sql/streaming/mqtt/MQTTStreamSourceSuite.scala
+++ b/sql-streaming-mqtt/src/test/scala/org/apache/bahir/sql/streaming/mqtt/MQTTStreamSourceSuite.scala
@@ -59,7 +59,9 @@ class MQTTStreamSourceSuite extends SparkFunSuite with SharedSparkContext with B
     import sqlContext.implicits._
     val query: StreamingQuery = dataFrame.selectExpr("CAST(payload AS STRING)").as[String]
       .writeStream.format("parquet").start(s"$tmpDir/t.parquet")
-    while (!query.isActive) {}
+    while (!query.status.isTriggerActive) {
+      Thread.sleep(20)
+    }
     query
   }
 
@@ -81,8 +83,9 @@ class MQTTStreamSourceSuite extends SparkFunSuite with SharedSparkContext with B
 
     val ds: DataStreamReader =
       sqlContext.readStream.format("org.apache.bahir.sql.streaming.mqtt.MQTTStreamSourceProvider")
-        .option("topic", "test").option("clientId", "clientId")
-        .option("QoS", "2")
+        .option("topic", "test").option("clientId", "clientId").option("connectionTimeout", "120")
+        .option("keepAlive", "1200").option("maxInflight", "120").option("autoReconnect", "false")
+        .option("cleanSession", "true").option("QoS", "2")
 
     val dataFrame = if (!filePersistence) {
       ds.option("persistence", "memory").load("tcp://" + mqttTestUtils.brokerUri)
@@ -101,13 +104,12 @@ class BasicMQTTSourceSuite extends MQTTStreamSourceSuite {
 
     val sendMessage = "MQTT is a message queue."
 
-
     val (sqlContext: SQLContext, dataFrame: DataFrame) = createStreamingDataframe()
 
     val query = writeStreamResults(sqlContext, dataFrame)
     mqttTestUtils.publishData("test", sendMessage)
     query.processAllAvailable()
-    query.awaitTermination(5000)
+    query.awaitTermination(10000)
 
     val resultBuffer: mutable.Buffer[String] = readBackStreamingResults(sqlContext)
 
@@ -160,7 +162,7 @@ class BasicMQTTSourceSuite extends MQTTStreamSourceSuite {
 class StressTestMQTTSource extends MQTTStreamSourceSuite {
 
   // Run with -Xmx1024m
-  ignore("Send and receive messages of size 100MB.") {
+  test("Send and receive messages of size 100MB.") {
 
     val freeMemory: Long = Runtime.getRuntime.freeMemory()
 
diff --git a/sql-streaming-mqtt/src/test/scala/org/apache/bahir/sql/streaming/mqtt/MQTTTestUtils.scala b/sql-streaming-mqtt/src/test/scala/org/apache/bahir/sql/streaming/mqtt/MQTTTestUtils.scala
index f105a631..817ec9a4 100644
--- a/sql-streaming-mqtt/src/test/scala/org/apache/bahir/sql/streaming/mqtt/MQTTTestUtils.scala
+++ b/sql-streaming-mqtt/src/test/scala/org/apache/bahir/sql/streaming/mqtt/MQTTTestUtils.scala
@@ -80,7 +80,7 @@ class MQTTTestUtils(tempDir: File, port: Int = 0) extends Logging {
         val msgTopic = client.getTopic(topic)
         for (i <- 0 until N) {
           try {
-            Thread.sleep(10)
+            Thread.sleep(20)
             val message = new MqttMessage(data.getBytes())
             message.setQos(2)
             // message.setId(i) setting id has no effect.