# Writing CSV data to Apache Kafka Console

In [1]:
import os
os.environ['PYSPARK_SUBMIT_ARGS'] = '--packages org.apache.spark:spark-streaming-kafka-0-10_2.11:2.3.1 pyspark-shell'

In [2]:
from pyspark.sql import SparkSession
from pyspark.sql.types import *
from pyspark.sql.functions import *
from pyspark.streaming import StreamingContext
from pyspark.streaming.kafka import KafkaUtils

In [3]:
pyspark = SparkSession \
.builder \
.appName("WriteToKafka")\
.master("local[4]")\
.getOrCreate()

In [4]:
sc = SparkContext.getOrCreate()
sc.setLogLevel("WARN")

In [5]:
df = spark.read.format("csv")\
.option("header", True)\
.load("data/Advertising.csv")

df.show(5)

+---+-----+-----+---------+-----+
| ID|   TV|Radio|Newspaper|Sales|
+---+-----+-----+---------+-----+
|  1|230.1| 37.8|     69.2| 22.1|
|  2| 44.5| 39.3|     45.1| 10.4|
|  3| 17.2| 45.9|     69.3|  9.3|
|  4|151.5| 41.3|     58.5| 18.5|
|  5|180.8| 10.8|     58.4| 12.9|
+---+-----+-----+---------+-----+
only showing top 5 rows



#### We convert the dataset according to Kafka 

In [6]:
df2 = df.withColumn("key", col("ID")).drop("ID")
df2.show(5)

+-----+-----+---------+-----+---+
|   TV|Radio|Newspaper|Sales|key|
+-----+-----+---------+-----+---+
|230.1| 37.8|     69.2| 22.1|  1|
| 44.5| 39.3|     45.1| 10.4|  2|
| 17.2| 45.9|     69.3|  9.3|  3|
|151.5| 41.3|     58.5| 18.5|  4|
|180.8| 10.8|     58.4| 12.9|  5|
+-----+-----+---------+-----+---+
only showing top 5 rows



In [7]:
df3 = df2.select("key",
                concat
                 (
                    col("TV"), lit(','),
                    col("Radio"), lit(','),
                    col("Newspaper"), lit(','),
                    col("Sales")
                ).alias("value")
                )
df3.show(5)

+---+--------------------+
|key|               value|
+---+--------------------+
|  1|230.1,37.8,69.2,22.1|
|  2| 44.5,39.3,45.1,10.4|
|  3|  17.2,45.9,69.3,9.3|
|  4|151.5,41.3,58.5,18.5|
|  5|180.8,10.8,58.4,12.9|
+---+--------------------+
only showing top 5 rows



In [8]:
df3 \
.write \
.format("kafka") \
.option("kafka.bootstrap.servers", "localhost:9092") \
.option("topic", "Deneme") \
.save()