Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
hkropp
committed
May 10, 2015
1 parent
96e4460
commit 4a61654
Showing
3 changed files
with
84 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,10 @@ | ||
# Spark Streaming Examples | ||
|
||
|
||
### Links | ||
|
||
* http://spark.apache.org/docs/latest/streaming-kafka-integration.html | ||
* http://stackoverflow.com/questions/22338025/kafka-consumers-in-spark-streaming-parallel-consumption-in-worker-nodes | ||
* http://stackoverflow.com/questions/22132968/run-spark-kafka-wordcount-java-example-without-run-example-script | ||
* http://spark.apache.org/docs/latest/streaming-programming-guide.html | ||
* https://issues.apache.org/jira/browse/SPARK-944 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
package simpleexample | ||
|
||
import org.apache.hadoop.io.{LongWritable, Text} | ||
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat | ||
import org.apache.spark.{SparkContext, SparkConf} | ||
import org.apache.spark.sql.hive.HiveContext | ||
import org.apache.spark.streaming.{Seconds, StreamingContext} | ||
|
||
/* | ||
Submitting: | ||
spark-submit --master yarn-client \ | ||
--num-executors 2 \ | ||
--driver-memory 512m \ | ||
--executor-memory 512m \ | ||
--executor-cores 1 \ | ||
--class simpleexample.SparkFileExample \ | ||
spark-streaming-simple-example-0.1-SNAPSHOT.jar /spark_log | ||
*/ | ||
object SparkFileExample { | ||
|
||
def main(args: Array[String]): Unit = { | ||
if(args.length < 1) { | ||
System.err.println("Usage: <log-dir>") | ||
System.exit(1) | ||
} | ||
|
||
val sparkConf = new SparkConf().setAppName("SpoolDirSpark") | ||
val ssc = new StreamingContext(sparkConf, Seconds(2)) | ||
|
||
val hiveContext = new HiveContext(ssc.sparkContext) | ||
import hiveContext.implicits._ | ||
import hiveContext.sql | ||
|
||
val inputDirectory = args(0) | ||
|
||
val lines = ssc.fileStream[LongWritable, Text, TextInputFormat](inputDirectory).map{ case (x, y) => (x.toString, y.toString) } | ||
|
||
lines.print() | ||
|
||
// ToDo | ||
// lines.foreachRDD { rdd => | ||
// rdd.foreachPartition { line => | ||
// line.foreach { item => | ||
// val values = item.toString().split(",") | ||
// val date = values(0) | ||
// val open = values(1) | ||
// val high = values(2) | ||
// val low = values(3) | ||
// val close = values(4) | ||
// val volume = values(5) | ||
// val adj_close = values(6) | ||
// val year = date.split("-")(0) | ||
// sql(f"INSERT INTO TABLE stocks PARTITION (year= '$year') VALUES ('$date', $open, $high, $low, $close, $volume, $adj_close);") | ||
// } | ||
// } | ||
// } | ||
|
||
ssc.start() | ||
ssc.awaitTermination() | ||
|
||
} | ||
} | ||
|