Skip to content
Permalink
Branch: master
Find file Copy path
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
33 lines (23 sloc) 899 Bytes
import java.util.UUID
import org.apache.spark.sql.streaming.ProcessingTime
import util.Boot
object KafkaToHdfsUsingSpark extends Boot {
val checkpointLocation = "/tmp/temporary-" + UUID.randomUUID.toString
val upstream = spark.readStream
.format("kafka")
.option("kafka.bootstrap.servers", "localhost:9092")
.option("subscribe", "test,Airport,Airports,Carriers,Planedata")
.option("startingOffsets", "earliest")
.load()
.selectExpr("topic", "CAST(value AS STRING)")
val downstream = upstream
.writeStream
.partitionBy("topic") // Partition by topic. it will create directory by topic name topic=Airport,topic=Carriers,topic=Planedata etc
.format("csv")
.option("path", "/tmp/data")
.outputMode("append")
.trigger(ProcessingTime(3000))
.option("checkpointLocation", checkpointLocation)
.start()
downstream.awaitTermination()
}
You can’t perform that action at this time.