From 8c2022755c08828edea3ef61f35a8490d1d6af24 Mon Sep 17 00:00:00 2001
From: zhangxinyu1 <342689740@qq.com>
Date: Tue, 11 Oct 2016 17:15:09 +0800
Subject: [PATCH 1/3] add KafkaForeachWriter for structured streaming. It can
 be used with ForeachSink

---
 external/kafka-0-8/pom.xml                    |  6 ++
 .../apache/spark/sql/KafkaForeachWriter.scala | 88 +++++++++++++++++++
 2 files changed, 94 insertions(+)
 create mode 100644 external/kafka-0-8/src/main/scala/org/apache/spark/sql/KafkaForeachWriter.scala
diff --git a/external/kafka-0-8/pom.xml b/external/kafka-0-8/pom.xml
index 91ccd4a927e98..4ea52e2a4809c 100644
--- a/external/kafka-0-8/pom.xml
+++ b/external/kafka-0-8/pom.xml
@@ -39,6 +39,12 @@
       <artifactId>spark-streaming_${scala.binary.version}</artifactId>
       <version>${project.version}</version>
       <scope>provided</scope>
+    </dependency>
+	<dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-sql_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <scope>provided</scope>
     </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
diff --git a/external/kafka-0-8/src/main/scala/org/apache/spark/sql/KafkaForeachWriter.scala b/external/kafka-0-8/src/main/scala/org/apache/spark/sql/KafkaForeachWriter.scala
new file mode 100644
index 0000000000000..89be76757643e
--- /dev/null
+++ b/external/kafka-0-8/src/main/scala/org/apache/spark/sql/KafkaForeachWriter.scala
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import java.util.Properties
+
+import kafka.producer.OldProducer
+
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.types.{StringType, StructType}
+
+class KafkaForeachWriter[T](
+    kafkaParams: Properties,
+    topics: Set[String],
+    schema: StructType) extends ForeachWriter[T] with Logging{
+  private var producer : OldProducer = null
+  override def open(partitionId: Long, version: Long): Boolean = {
+    verifySchema(schema)
+    producer = new OldProducer(kafkaParams)
+    if (producer == null) return false
+    logInfo(s"Producer created. PartitionId: ${partitionId}, version: ${version}")
+    true
+  }
+
+  override def process(value: T): Unit = {
+    logDebug("Starting to send message")
+    val data = value.asInstanceOf[Row]
+    verifySchema(data.schema)
+    val keyIsNull = if (schema.size == 1) true else false
+    if (keyIsNull) {
+      logDebug(s"Send message to kafka. Message Value: ${data.getString(0)}")
+      topics.map(producer.send(_, null, data.getString(0).getBytes()))
+    } else {
+      val messageKey: String = data.getAs[String]("key")
+      val messageValue: String = data.getAs[String]("value")
+      logDebug(s"Send message to kafka. Message Value: (${messageKey}, ${messageValue})")
+      topics.map(producer.send(_, messageKey.getBytes(), messageValue.getBytes()))
+    }
+  }
+
+  override def close(errorOrNull: Throwable): Unit = {
+    if (errorOrNull != null) logInfo(errorOrNull.getMessage)
+    logInfo("Close producer.")
+    producer.close()
+  }
+
+  private def verifySchema(schema: StructType): Unit = {
+    if (schema.size == 1) {
+      if (schema(0).dataType != StringType) {
+        throw new AnalysisException(
+          s"Producer sends messages Failed! KafkaForeachWriter supports only StringType value, " +
+            s"and you have ${schema(0).dataType} value.")
+      }
+    } else if (schema.size == 2) {
+      if (schema(0).dataType != StringType || schema(1).dataType != StringType) {
+        throw new AnalysisException(
+          s"Producer sends messages Failed! KafkaForeachWriter supports only StringType value, " +
+            s"and you have ${schema(0).dataType} and ${schema(1).dataType} value.")
+      }
+      if (schema.fieldNames.contains("key") && schema.fieldNames.contains("value")) {
+      } else {
+        throw new AnalysisException(
+          s"""Producer sends messages Failed! If there are two columns in Row, fieldsNames
+              should be "key" and "value" .
+           """)
+      }
+    } else {
+      throw new AnalysisException(
+        s"Producer sends messages Failed! KafkaForeachWriter supports only one or two columns, " +
+          s"and you have ${schema.size} columns.")
+    }
+  }
+}

From 1ad774919598aca5cf126c2c6e19fa618eaaa5d1 Mon Sep 17 00:00:00 2001
From: zhangxinyu1 <342689740@qq.com>
Date: Fri, 14 Oct 2016 18:29:33 +0800
Subject: [PATCH 2/3] Add KafkaForeachWriter to write result to kafka in
 structured streaming It can be used with ForeachSink

---
 external/kafka-0-8/pom.xml                    |   7 +
 .../apache/spark/sql/KafkaForeachWriter.scala |  11 +-
 .../streaming/kafka/KafkaTestUtils.scala      |   2 +-
 .../spark/sql/KafkaForeachWriterSuite.scala   | 134 ++++++++++++++++++
 4 files changed, 150 insertions(+), 4 deletions(-)
 create mode 100644 external/kafka-0-8/src/test/scala/org/apache/spark/sql/KafkaForeachWriterSuite.scala

diff --git a/external/kafka-0-8/pom.xml b/external/kafka-0-8/pom.xml
index 4ea52e2a4809c..2a6fee5dff942 100644
--- a/external/kafka-0-8/pom.xml
+++ b/external/kafka-0-8/pom.xml
@@ -45,6 +45,13 @@
       <artifactId>spark-sql_${scala.binary.version}</artifactId>
       <version>${project.version}</version>
       <scope>provided</scope>
+    </dependency>
+	<dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-sql_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <type>test-jar</type>
+      <scope>test</scope>
     </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
diff --git a/external/kafka-0-8/src/main/scala/org/apache/spark/sql/KafkaForeachWriter.scala b/external/kafka-0-8/src/main/scala/org/apache/spark/sql/KafkaForeachWriter.scala
index 89be76757643e..13b674448db68 100644
--- a/external/kafka-0-8/src/main/scala/org/apache/spark/sql/KafkaForeachWriter.scala
+++ b/external/kafka-0-8/src/main/scala/org/apache/spark/sql/KafkaForeachWriter.scala
@@ -20,10 +20,15 @@ package org.apache.spark.sql
 import java.util.Properties
 
 import kafka.producer.OldProducer
-
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.types.{StringType, StructType}
-
+/**
+ * A ForeachWriter that outputs streaming query results to kafka clusters.
+ * The streaming query results must be one or two [[StringType]] columns. When the output
+ * is a single String column, this column will be considered as value and
+ * key will be null by default. If output has two [[StringType]] columns, the fieldsName
+ * of columns should be "key" and "value".
+ */
 class KafkaForeachWriter[T](
     kafkaParams: Properties,
     topics: Set[String],
@@ -39,7 +44,7 @@ class KafkaForeachWriter[T](
 
   override def process(value: T): Unit = {
     logDebug("Starting to send message")
-    val data = value.asInstanceOf[Row]
+    val data : Row = value.asInstanceOf[Row]
     verifySchema(data.schema)
     val keyIsNull = if (schema.size == 1) true else false
     if (keyIsNull) {
diff --git a/external/kafka-0-8/src/main/scala/org/apache/spark/streaming/kafka/KafkaTestUtils.scala b/external/kafka-0-8/src/main/scala/org/apache/spark/streaming/kafka/KafkaTestUtils.scala
index 03c9ca7524e5d..5d894cf0695cc 100644
--- a/external/kafka-0-8/src/main/scala/org/apache/spark/streaming/kafka/KafkaTestUtils.scala
+++ b/external/kafka-0-8/src/main/scala/org/apache/spark/streaming/kafka/KafkaTestUtils.scala
@@ -48,7 +48,7 @@ import org.apache.spark.util.Utils
  *
  * The reason to put Kafka test utility class in src is to test Python related Kafka APIs.
  */
-private[kafka] class KafkaTestUtils extends Logging {
+private[spark] class KafkaTestUtils extends Logging {
 
   // Zookeeper related configurations
   private val zkHost = "localhost"
diff --git a/external/kafka-0-8/src/test/scala/org/apache/spark/sql/KafkaForeachWriterSuite.scala b/external/kafka-0-8/src/test/scala/org/apache/spark/sql/KafkaForeachWriterSuite.scala
new file mode 100644
index 0000000000000..b93cd9b557b85
--- /dev/null
+++ b/external/kafka-0-8/src/test/scala/org/apache/spark/sql/KafkaForeachWriterSuite.scala
@@ -0,0 +1,134 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import java.util.Properties
+import scala.collection.mutable.ArrayBuffer
+
+import kafka.consumer.{ConsumerConfig, Consumer}
+import org.scalatest.concurrent.Eventually
+import org.scalatest.{BeforeAndAfterAll, BeforeAndAfter}
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.execution.streaming.MemoryStream
+import org.apache.spark.sql.test.SharedSQLContext
+import org.apache.spark.streaming.kafka.KafkaTestUtils
+
+class KafkaForeachWriterSuite
+  extends SparkFunSuite
+  with BeforeAndAfter
+  with BeforeAndAfterAll
+  with Eventually
+  with SharedSQLContext{
+  import testImplicits._
+
+  private var kafkaTestUtils: KafkaTestUtils = _
+
+  override def beforeAll {
+    super.beforeAll
+    kafkaTestUtils = new KafkaTestUtils
+    kafkaTestUtils.setup()
+  }
+
+  override def afterAll {
+    if (kafkaTestUtils != null) {
+      kafkaTestUtils.teardown()
+      kafkaTestUtils = null
+    }
+    super.afterAll
+  }
+
+  test("Basic usage: Write kafka without key"){
+    val topic = "testWithoutKey"
+    val topics = Set(topic)
+    kafkaTestUtils.createTopic(topic)
+    val topicCountMap = Map(topic -> 1)
+
+    //create streaming query, write results to kafka
+    val input = MemoryStream[String]
+    val producerProps : Properties = new Properties()
+    producerProps.setProperty("metadata.broker.list", kafkaTestUtils.brokerAddress)
+    val query = input.toDF().writeStream
+      .outputMode("append")
+      .foreach(new KafkaForeachWriter[Row](producerProps, topics, input.toDF().schema))
+      .start()
+    val source = Array("A","B","C","E","F","G")
+    input.addData(source)
+    query.processAllAvailable()
+
+    //create consumer, and consume data in kafka
+    val consumerProps: Properties = new Properties()
+    consumerProps.setProperty("zookeeper.connect", kafkaTestUtils.zkAddress)
+    consumerProps.setProperty("auto.offset.reset", "smallest")
+    consumerProps.setProperty("group.id", "spark")
+    val connector = Consumer.create(new ConsumerConfig(consumerProps))
+    val stream = connector.createMessageStreams(topicCountMap)
+    val result = ArrayBuffer[String]()
+    for (message <- stream.get(topic)) {
+      message.map(kafkaStream => {
+        val it = kafkaStream.iterator()
+        while(result.size < source.length) {
+          result += new String(it.next.message())
+        }
+      })
+    }
+    assert(result.synchronized { source === result.sorted.toArray })
+  }
+
+
+  test("Basic usage: Write kafka with key"){
+    val topic = "testWithKey"
+    kafkaTestUtils.createTopic(topic)
+    val topics = Set(topic)
+    val topicCountMap = Map(topic -> 1)
+
+    //create streaming query, write results to kafka
+    val input = MemoryStream[String]
+    val producerProps : Properties = new Properties()
+    producerProps.setProperty("metadata.broker.list", kafkaTestUtils.brokerAddress)
+    val data = input.toDS()
+      .map( word => (word,word))
+      .as[(String,String)].toDF("key","value")
+    val query = data.writeStream
+      .outputMode("append")
+      .foreach(new KafkaForeachWriter[Row](producerProps, topics, data.schema))
+      .start()
+    val source = Array("A","B","C")
+    input.addData(source)
+    query.processAllAvailable()
+
+    //create consumer, and consume data in kafka
+    val consumerProps: Properties = new Properties()
+    consumerProps.setProperty("zookeeper.connect", kafkaTestUtils.zkAddress)
+    consumerProps.setProperty("auto.offset.reset", "smallest")
+    consumerProps.setProperty("group.id", "spark")
+    val connector = Consumer.create(new ConsumerConfig(consumerProps))
+    val stream = connector.createMessageStreams(topicCountMap)
+    val result = ArrayBuffer[String]()
+    for (message <- stream.get(topic)) {
+      message.map(kafkaStream => {
+        val it = kafkaStream.iterator()
+        while(result.size < source.length) {
+          result += new String(it.next.message())
+        }
+      })
+    }
+    assert(result.synchronized { source === result.sorted.toArray })
+  }
+
+}

From b8e41e5861954f2ea9c0ca0f7b78023442e8110f Mon Sep 17 00:00:00 2001
From: zhangxinyu1 <342689740@qq.com>
Date: Fri, 14 Oct 2016 19:17:06 +0800
Subject: [PATCH 3/3] correct code-style bug

---
 .../main/scala/org/apache/spark/sql/KafkaForeachWriter.scala   | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/external/kafka-0-8/src/main/scala/org/apache/spark/sql/KafkaForeachWriter.scala b/external/kafka-0-8/src/main/scala/org/apache/spark/sql/KafkaForeachWriter.scala
index 13b674448db68..b78a7fff51256 100644
--- a/external/kafka-0-8/src/main/scala/org/apache/spark/sql/KafkaForeachWriter.scala
+++ b/external/kafka-0-8/src/main/scala/org/apache/spark/sql/KafkaForeachWriter.scala
@@ -77,8 +77,7 @@ class KafkaForeachWriter[T](
           s"Producer sends messages Failed! KafkaForeachWriter supports only StringType value, " +
             s"and you have ${schema(0).dataType} and ${schema(1).dataType} value.")
       }
-      if (schema.fieldNames.contains("key") && schema.fieldNames.contains("value")) {
-      } else {
+      if (!schema.fieldNames.contains("key") || !schema.fieldNames.contains("value")) {
         throw new AnalysisException(
           s"""Producer sends messages Failed! If there are two columns in Row, fieldsNames
               should be "key" and "value" .