From b3b0ff118cac3c0a5a10f9912b383bb0665c9a1b Mon Sep 17 00:00:00 2001
From: Chris Fregly <chris@fregly.com>
Date: Wed, 16 Jul 2014 00:03:04 -0700
Subject: [PATCH 01/12] [SPARK-1981] Add AWS Kinesis streaming support

---
 assembly/pom.xml                              |  10 +
 bin/run-kinesis-example                       |  60 +++
 bin/run-kinesis-example.cmd                   |  90 +++++
 .../src/main/scala/SparkApp.scala             |   7 +
 dev/audit-release/sbt_app_kinesis/build.sbt   |  30 ++
 .../src/main/scala/SparkApp.scala             |  33 ++
 dev/create-release/create-release.sh          |   2 +
 docs/streaming-custom-receivers.md            |   4 +-
 docs/streaming-programming-guide.md           |  65 +++-
 extras/spark-kinesis-asl/pom.xml              |  90 +++++
 .../streaming/JavaKinesisWordCount.java       | 310 ++++++++++++++++
 .../src/main/resources/log4j.properties       |  42 +++
 .../examples/streaming/KinesisWordCount.scala | 345 ++++++++++++++++++
 .../streaming/kinesis/CheckpointState.scala   |  52 +++
 .../streaming/kinesis/KinesisReceiver.scala   | 122 +++++++
 .../kinesis/KinesisRecordProcessor.scala      | 148 ++++++++
 .../kinesis/KinesisRecordSerializer.scala     |  54 +++
 .../KinesisStringRecordSerializer.scala       |  47 +++
 .../streaming/kinesis/KinesisUtils.scala      | 151 ++++++++
 .../src/test/resources/log4j.properties       |  42 +++
 .../kinesis/KinesisReceiverSuite.scala        | 267 ++++++++++++++
 pom.xml                                       |   8 +
 project/SparkBuild.scala                      |  21 +-
 23 files changed, 1992 insertions(+), 8 deletions(-)
 create mode 100755 bin/run-kinesis-example
 create mode 100755 bin/run-kinesis-example.cmd
 create mode 100644 dev/audit-release/sbt_app_kinesis/build.sbt
 create mode 100644 dev/audit-release/sbt_app_kinesis/src/main/scala/SparkApp.scala
 create mode 100644 extras/spark-kinesis-asl/pom.xml
 create mode 100644 extras/spark-kinesis-asl/src/main/java/org/apache/spark/examples/streaming/JavaKinesisWordCount.java
 create mode 100644 extras/spark-kinesis-asl/src/main/resources/log4j.properties
 create mode 100644 extras/spark-kinesis-asl/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCount.scala
 create mode 100644 extras/spark-kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/CheckpointState.scala
 create mode 100644 extras/spark-kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisReceiver.scala
 create mode 100644 extras/spark-kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisRecordProcessor.scala
 create mode 100644 extras/spark-kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisRecordSerializer.scala
 create mode 100644 extras/spark-kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisStringRecordSerializer.scala
 create mode 100644 extras/spark-kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisUtils.scala
 create mode 100644 extras/spark-kinesis-asl/src/test/resources/log4j.properties
 create mode 100644 extras/spark-kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisReceiverSuite.scala
diff --git a/assembly/pom.xml b/assembly/pom.xml
index 0c60b66c3daca..60cc5aef67098 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -173,6 +173,16 @@
         </dependency>
       </dependencies>
     </profile>
+    <profile>
+      <id>spark-kinesis-asl</id>
+      <dependencies>
+        <dependency>
+          <groupId>org.apache.spark</groupId>
+          <artifactId>spark-kinesis-asl_${scala.binary.version}</artifactId>
+          <version>${project.version}</version>
+        </dependency>
+      </dependencies>
+    </profile>
     <profile>
       <id>bigtop-dist</id>
       <!-- This profile uses the assembly plugin to create a special "dist" package for BigTop
diff --git a/bin/run-kinesis-example b/bin/run-kinesis-example
new file mode 100755
index 0000000000000..8a5448f0e15d3
--- /dev/null
+++ b/bin/run-kinesis-example
@@ -0,0 +1,60 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+SCALA_VERSION=2.10
+
+FWDIR="$(cd `dirname $0`/..; pwd)"
+export SPARK_HOME="$FWDIR"
+KINESIS_EXAMPLES_DIR="$FWDIR"/extras/spark-kinesis-asl
+
+if [ -n "$1" ]; then
+  EXAMPLE_CLASS="$1"
+  shift
+else
+  echo "Usage: ./bin/run-kinesis-example <example-class> [example-args]" 1>&2
+  echo "  - set MASTER=XX to use a specific master" 1>&2
+  echo "  - can use abbreviated example class name (e.g. KinesisWordCount, JavaKinesisWordCount)" 1>&2
+  echo "  - must set AWS_ACCESS_KEY_ID and AWS_SECRET_KEY env variables" 1>&2
+  exit 1
+fi
+
+export GLOBIGNORE="*-javadoc.jar:*-sources.jar"
+if [ -f "$FWDIR/RELEASE" ]; then
+  export SPARK_KINESIS_EXAMPLES_JAR=`ls "$FWDIR"/lib/spark-kinesis-asl*.jar`
+elif [ -e "$KINESIS_EXAMPLES_DIR"/target/spark-kinesis-asl_$SCALA_VERSION-*.jar ]; then
+  export SPARK_KINESIS_EXAMPLES_JAR=`ls "$KINESIS_EXAMPLES_DIR"/target/spark-kinesis-asl_$SCALA_VERSION-*.jar`
+fi
+
+if [[ -z $SPARK_KINESIS_EXAMPLES_JAR ]]; then
+  echo "Failed to find Spark Kinesis examples assembly in $FWDIR/lib or $FWDIR/extras/spark-kinesis-asl/target" 1>&2
+  echo "You need to build Spark with mvn -Pspark-kinesis-asl before running this program." 1>&2
+  exit 1
+fi
+
+EXAMPLE_MASTER=${MASTER:-"local[*]"}
+
+if [[ ! $EXAMPLE_CLASS == org.apache.spark.examples.streaming* ]]; then
+  EXAMPLE_CLASS="org.apache.spark.examples.streaming.$EXAMPLE_CLASS"
+fi
+
+"$FWDIR"/bin/spark-submit \
+  --master $EXAMPLE_MASTER \
+  --class $EXAMPLE_CLASS \
+  "$SPARK_KINESIS_EXAMPLES_JAR" \
+  "$@"
diff --git a/bin/run-kinesis-example.cmd b/bin/run-kinesis-example.cmd
new file mode 100755
index 0000000000000..f01be159c9120
--- /dev/null
+++ b/bin/run-kinesis-example.cmd
@@ -0,0 +1,90 @@
+@echo off
+
+rem
+rem Licensed to the Apache Software Foundation (ASF) under one or more
+rem contributor license agreements.  See the NOTICE file distributed with
+rem this work for additional information regarding copyright ownership.
+rem The ASF licenses this file to You under the Apache License, Version 2.0
+rem (the "License"); you may not use this file except in compliance with
+rem the License.  You may obtain a copy of the License at
+rem
+rem    http://www.apache.org/licenses/LICENSE-2.0
+rem
+rem Unless required by applicable law or agreed to in writing, software
+rem distributed under the License is distributed on an "AS IS" BASIS,
+rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+rem See the License for the specific language governing permissions and
+rem limitations under the License.
+rem
+
+set SCALA_VERSION=2.10
+
+rem Figure out where the Spark framework is installed
+set FWDIR=%~dp0..\
+
+rem Export this as SPARK_HOME
+set SPARK_HOME=%FWDIR%
+
+rem Load environment variables from conf\spark-env.cmd, if it exists
+if exist "%FWDIR%conf\spark-env.cmd" call "%FWDIR%conf\spark-env.cmd"
+
+rem Test that an argument was given
+if not "x%1"=="x" goto arg_given
+  echo Usage: run-kinesis-example ^<example-class^> [example-args]
+  echo   - set MASTER=XX to use a specific master
+  echo   - can use abbreviated example class name (e.g. KinesisWordCount, JavaKinesisWordCount)
+  echo "  - must set AWS_ACCESS_KEY_ID and AWS_SECRET_KEY env variables" 1>&2
+
+  goto exit
+:arg_given
+
+set KINESIS_EXAMPLES_DIR=%FWDIR%extras\spark-kinesis-asl
+
+rem Figure out the JAR file that our examples were packaged into.
+set SPARK_KINESIS_EXAMPLES_JAR=
+if exist "%FWDIR%RELEASE" (
+  for %%d in ("%FWDIR%lib\spark-kinesis-asl*.jar") do (
+    set SPARK_KINESIS_EXAMPLES_JAR=%%d
+  )
+) else (
+  for %%d in ("%KINESIS_EXAMPLES_DIR%\target\spark-kinesis-asl*.jar") do (
+    set SPARK_KINESIS_EXAMPLES_JAR=%%d
+  )
+)
+if "x%SPARK_KINESIS_EXAMPLES_JAR%"=="x" (
+  echo Failed to find Spark Kinesis examples assembly JAR.
+  echo You need to build Spark with mvn -Pspark-kinesis-asl before running this program.
+  goto exit
+)
+
+rem Set master from MASTER environment variable if given
+if "x%MASTER%"=="x" (
+  set EXAMPLE_MASTER=local[*]
+) else (
+  set EXAMPLE_MASTER=%MASTER%
+)
+
+rem If the EXAMPLE_CLASS does not start with org.apache.spark.examples.streaming, add that
+set EXAMPLE_CLASS=%1
+set PREFIX=%EXAMPLE_CLASS:~0,25%
+if not %PREFIX%==org.apache.spark.examples.streaming (
+  set EXAMPLE_CLASS=org.apache.spark.examples.streaming.%EXAMPLE_CLASS%
+)
+
+rem Get the tail of the argument list, to skip the first one. This is surprisingly
+rem complicated on Windows.
+set "ARGS="
+:top
+shift
+if "%~1" neq "" (
+  set ARGS=%ARGS% "%~1"
+  goto :top
+)
+if defined ARGS set ARGS=%ARGS:~1%
+
+call "%FWDIR%bin\spark-submit.cmd" ^
+  --master %EXAMPLE_MASTER% ^
+  --class %EXAMPLE_CLASS% ^
+  "%SPARK_KINESIS_EXAMPLES_JAR%" %ARGS%
+
+:exit
diff --git a/dev/audit-release/sbt_app_core/src/main/scala/SparkApp.scala b/dev/audit-release/sbt_app_core/src/main/scala/SparkApp.scala
index 77bbd167b199a..225d82a6c4876 100644
--- a/dev/audit-release/sbt_app_core/src/main/scala/SparkApp.scala
+++ b/dev/audit-release/sbt_app_core/src/main/scala/SparkApp.scala
@@ -50,5 +50,12 @@ object SimpleApp {
       println("Ganglia sink was loaded via spark-core")
       System.exit(-1)
     }
+
+    // Remove kinesis from default build due to ASL license issue
+    val foundKinesis = Try(Class.forName("org.apache.spark.streaming.kinesis.KinesisReceiver")).isSuccess
+    if (foundKinesis) {
+      println("Kinesis was loaded via spark-core")
+      System.exit(-1)
+    }
   }
 }
diff --git a/dev/audit-release/sbt_app_kinesis/build.sbt b/dev/audit-release/sbt_app_kinesis/build.sbt
new file mode 100644
index 0000000000000..c23c6d74145bb
--- /dev/null
+++ b/dev/audit-release/sbt_app_kinesis/build.sbt
@@ -0,0 +1,30 @@
+//
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements.  See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+name := "Kinesis Test"
+
+version := "1.0"
+
+scalaVersion := System.getenv.get("SCALA_VERSION")
+
+libraryDependencies += "org.apache.spark" %% "spark-core" % System.getenv.get("SPARK_VERSION")
+
+libraryDependencies += "org.apache.spark" %% "spark-kinesis-asl" % System.getenv.get("SPARK_VERSION")
+
+resolvers ++= Seq(
+  "Spark Release Repository" at System.getenv.get("SPARK_RELEASE_REPOSITORY"),
+  "Spray Repository" at "http://repo.spray.cc/")
diff --git a/dev/audit-release/sbt_app_kinesis/src/main/scala/SparkApp.scala b/dev/audit-release/sbt_app_kinesis/src/main/scala/SparkApp.scala
new file mode 100644
index 0000000000000..090bf53623255
--- /dev/null
+++ b/dev/audit-release/sbt_app_kinesis/src/main/scala/SparkApp.scala
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package main.scala
+
+import scala.util.Try
+
+import org.apache.spark.SparkContext
+import org.apache.spark.SparkContext._
+
+object SimpleApp {
+  def main(args: Array[String]) {
+    val foundKinesis = Try(Class.forName("org.apache.spark.streaming.kinesis.KinesisReceiver")).isSuccess
+    if (!foundKinesis) {
+      println("Kinesis not loaded via spark-kinesis-asl")
+      System.exit(-1)
+    }
+  }
+}
diff --git a/dev/create-release/create-release.sh b/dev/create-release/create-release.sh
index 49bf78f60763a..c47fe6b038d7d 100755
--- a/dev/create-release/create-release.sh
+++ b/dev/create-release/create-release.sh
@@ -54,6 +54,7 @@ if [[ ! "$@" =~ --package-only ]]; then
     -Dmaven.javadoc.skip=true \
     -Dhadoop.version=2.2.0 -Dyarn.version=2.2.0 \
     -Pyarn -Phive -Phadoop-2.2 -Pspark-ganglia-lgpl\
+    -Pspark-kinesis-asl\
     -Dtag=$GIT_TAG -DautoVersionSubmodules=true \
     --batch-mode release:prepare
 
@@ -62,6 +63,7 @@ if [[ ! "$@" =~ --package-only ]]; then
     -Dhadoop.version=2.2.0 -Dyarn.version=2.2.0 \
     -Dmaven.javadoc.skip=true \
     -Pyarn -Phive -Phadoop-2.2 -Pspark-ganglia-lgpl\
+    -Pspark-kinesis-asl\
     release:perform
 
   cd ..
diff --git a/docs/streaming-custom-receivers.md b/docs/streaming-custom-receivers.md
index a2dc3a8961dfc..1e045a3dd0ca9 100644
--- a/docs/streaming-custom-receivers.md
+++ b/docs/streaming-custom-receivers.md
@@ -4,7 +4,7 @@ title: Spark Streaming Custom Receivers
 ---
 
 Spark Streaming can receive streaming data from any arbitrary data source beyond
-the one's for which it has in-built support (that is, beyond Flume, Kafka, files, sockets, etc.).
+the one's for which it has in-built support (that is, beyond Flume, Kafka, Kinesis, files, sockets, etc.).
 This requires the developer to implement a *receiver* that is customized for receiving data from
 the concerned data source. This guide walks through the process of implementing a custom receiver
 and using it in a Spark Streaming application.
@@ -174,7 +174,7 @@ val words = lines.flatMap(_.split(" "))
 ...
 {% endhighlight %}
 
-The full source code is in the example [CustomReceiver.scala](https://github.com/apache/spark/blob/master/examples/src/main/scala/org/apache/spark/streaming/examples/CustomReceiver.scala).
+The full source code is in the example [CustomReceiver.scala](https://github.com/apache/spark/blob/master/examples/src/main/scala/org/apache/spark/examples/streaming/CustomReceiver.scala).
 
 </div>
 <div data-lang="java" markdown="1">
diff --git a/docs/streaming-programming-guide.md b/docs/streaming-programming-guide.md
index 90a0eef60c200..c91a23b5c0c94 100644
--- a/docs/streaming-programming-guide.md
+++ b/docs/streaming-programming-guide.md
@@ -9,7 +9,7 @@ title: Spark Streaming Programming Guide
 # Overview
 Spark Streaming is an extension of the core Spark API that allows enables high-throughput,
 fault-tolerant stream processing of live data streams. Data can be ingested from many sources
-like Kafka, Flume, Twitter, ZeroMQ or plain old TCP sockets and be processed using complex
+like Kafka, Flume, Twitter, ZeroMQ, Kinesis or plain old TCP sockets and be processed using complex
 algorithms expressed with high-level functions like `map`, `reduce`, `join` and `window`.
 Finally, processed data can be pushed out to filesystems, databases,
 and live dashboards. In fact, you can apply Spark's in-built
@@ -38,7 +38,7 @@ stream of results in batches.
 
 Spark Streaming provides a high-level abstraction called *discretized stream* or *DStream*,
 which represents a continuous stream of data. DStreams can be created either from input data
-stream from sources such as Kafka and Flume, or by applying high-level
+stream from sources such as Kafka, Flume, and Kinesis, or by applying high-level
 operations on other DStreams. Internally, a DStream is represented as a sequence of
 [RDDs](api/scala/index.html#org.apache.spark.rdd.RDD).
 
@@ -313,7 +313,7 @@ To write your own Spark Streaming program, you will have to add the following de
     artifactId = spark-streaming_{{site.SCALA_BINARY_VERSION}}
     version = {{site.SPARK_VERSION}}
 
-For ingesting data from sources like Kafka and Flume that are not present in the Spark
+For ingesting data from sources like Kafka, Flume, and Kinesis that are not present in the Spark
 Streaming core
  API, you will have to add the corresponding
 artifact `spark-streaming-xyz_{{site.SCALA_BINARY_VERSION}}` to the dependencies. For example,
@@ -327,6 +327,7 @@ some of the common ones are as follows.
 <tr><td> Twitter </td><td> spark-streaming-twitter_{{site.SCALA_BINARY_VERSION}} </td></tr>
 <tr><td> ZeroMQ </td><td> spark-streaming-zeromq_{{site.SCALA_BINARY_VERSION}} </td></tr>
 <tr><td> MQTT </td><td> spark-streaming-mqtt_{{site.SCALA_BINARY_VERSION}} </td></tr>
+<tr><td> Kinesis<br/>(built separately)</td><td> spark-kinesis-asl_{{site.SCALA_BINARY_VERSION}} </td></tr>
 <tr><td> </td><td></td></tr>
 </table>
 
@@ -442,7 +443,7 @@ see the API documentations of the relevant functions in
 Scala and [JavaStreamingContext](api/scala/index.html#org.apache.spark.streaming.api.java.JavaStreamingContext)
  for Java.
 
-Additional functionality for creating DStreams from sources such as Kafka, Flume, and Twitter
+Additional functionality for creating DStreams from sources such as Kafka, Flume, Kinesis, and Twitter
 can be imported by adding the right dependencies as explained in an
 [earlier](#linking) section. To take the
 case of Kafka, after adding the artifact `spark-streaming-kafka_{{site.SCALA_BINARY_VERSION}}` to the
@@ -467,6 +468,62 @@ For more details on these additional sources, see the corresponding [API documen
 Furthermore, you can also implement your own custom receiver for your sources. See the
 [Custom Receiver Guide](streaming-custom-receivers.html).
 
+### Kinesis
+Build notes:
+<li>Spark supports a Kinesis Streaming Receiver which is not included in the default build due to licensing restrictions.</li>
+<li>_**Note that by embedding this library you will include [ASL](https://aws.amazon.com/asl/)-licensed code in your Spark package**_.</li>
+<li>For sbt users, set the `SPARK_KINESIS_ASL` environment variable before building.</li>
+<li>For Maven users, enable the `-Pspark-kinesis-asl` profile.</li>
+<li>User applications will need to link to the `spark-kinesis-asl` artifact.</li>
+<li>The Spark Kinesis Streaming Receiver source code, examples, tests, and artifacts live in $SPARK_HOME/extras/spark-kinesis-asl.</li>
+
+Deployment and runtime notes:
+<li>Each shard of a stream is processed by one or more KinesisReceiver's managed by the Kinesis Client Library (KCL) Worker.</li>
+<li>Said differently, a single KinesisReceiver can process many shards of a stream.</li>
+<li>You never need more KinesisReceivers than the number of shards in your stream.</li>
+<li>The Kinesis assembly jar must also be present on all worker nodes, as they will need access to the Kinesis Client Library.</li>
+<li>/tmp/checkpoint is a valid and accessible directory on all workers (or locally if running in local mode)</li>
+<li>This code uses the DefaultAWSCredentialsProviderChain and searches for credentials in the following order of precedence:<br/>
+    1) Environment Variables - AWS_ACCESS_KEY_ID and AWS_SECRET_KEY<br/>
+    2) Java System Properties - aws.accessKeyId and aws.secretKey<br/>
+    3) Credential profiles file - default location (~/.aws/credentials) shared by all AWS SDKs<br/>
+    4) Instance profile credentials - delivered through the Amazon EC2 metadata service<br/>
+</li>
+<li>You need to setup a Kinesis stream with 1 or more shards per the following:<br/>
+ http://docs.aws.amazon.com/kinesis/latest/dev/step-one-create-stream.html</li>
+<li>When you first start up the KinesisReceiver, the Kinesis Client Library (KCL) needs ~30s to establish connectivity with the AWS Kinesis service,
+retrieve any checkpoint data, and negotiate with other KCL's reading from the same stream.</li>
+<li>During testing, I noticed varying degrees of delays while retrieving records from Kinesis depending on which coffee shop in San Francisco I was working.
+The input and output data eventually matched, but sometimes after an unusually long time.</li>
+<li>Be careful when changing the app name.  Kinesis maintains a mapping table in DynamoDB based on this app name (http://docs.aws.amazon.com/kinesis/latest/dev/kinesis-record-processor-implementation-app.html#kinesis-record-processor-initialization).  
+Changing the app name could lead to Kinesis errors as only 1 logical application can process a stream.</li>
+
+Failure recovery notes:
+<li>The combination of Spark Streaming and Kinesis creates 3 different checkpoints as follows:<br/>
+  1) RDD data checkpoint (Spark Streaming) - frequency is configurable with DStream.checkpoint(Duration)<br/>
+  2) RDD metadata checkpoint (Spark Streaming) - frequency is every DStream batch<br/>
+  3) Kinesis checkpointing (Kinesis) - frequency is controlled by the developer calling ICheckpointer.checkpoint() directly<br/>
+</li>
+<li>During testing, if you see the same data being read from the stream twice, it's likely due to the Kinesis checkpoints not being written.</li>
+<li>Checkpointing too freqently will cause excess load on the AWS checkpoint storage layer and may lead to AWS throttling</li>
+<li>Upon startup, a KinesisReceiver will begin processing records with sequence numbers greater than the last checkpoint sequence number recorded per shard.</li>
+<li>If no checkpoint info exists, the worker will start either from the oldest record available (InitialPositionInStream.TRIM_HORIZON)
+or from the tip/latest (InitialPostitionInStream.LATEST).  This is configurable.</li>
+<li>When pulling from the stream tip (InitialPositionInStream.LATEST), only new stream data will be picked up after the KinesisReceiver starts.</li>
+<li>InitialPositionInStream.LATEST could lead to missed records if data is added to the stream while no KinesisReceivers are running.</li>
+<li>In production, you'll want to switch to InitialPositionInStream.TRIM_HORIZON which will read up to 24 hours (Kinesis limit) of previous stream data
+depending on the checkpoint frequency.</li>
+<li>InitialPositionInStream.TRIM_HORIZON may lead to duplicate processing of records depending on the checkpoint frequency.</li>
+<li>Record processing should be idempotent when possible.</li>
+<li>Failed or latent KinesisReceivers will be detected and automatically shutdown/load-balanced by the KCL.</li>
+<li>If possible, explicitly shutdown the worker if a failure occurs.</li>
+
+Example KinesisWordCount (and JavaKiensisWordCount) notes:
+<li>These examples automatically determine the number of threads to run locally based on the number of shards for the stream.</li>
+<li>These examples automatically determine the number of KinesisReceivers/InputDStreams to create based on the number of shards for the stream.</li>
+<li>These examples use InitialPositionInStream.LATEST (tip of stream) vs. InitialPositionInStream.TRIM_HORIZON (back 24 hours) to simplify reasoning about the examples.</li>
+<li>The KinesisWordCountProducer will generate random data to put onto the Kinesis stream for testing.</li>
+
 ## Operations
 There are two kinds of DStream operations - _transformations_ and _output operations_. Similar to
 RDD transformations, DStream transformations operate on one or more DStreams to create new DStreams
diff --git a/extras/spark-kinesis-asl/pom.xml b/extras/spark-kinesis-asl/pom.xml
new file mode 100644
index 0000000000000..f1b5096d22278
--- /dev/null
+++ b/extras/spark-kinesis-asl/pom.xml
@@ -0,0 +1,90 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+~ Licensed to the Apache Software Foundation (ASF) under one or more
+~ contributor license agreements.  See the NOTICE file distributed with
+~ this work for additional information regarding copyright ownership.
+~ The ASF licenses this file to You under the Apache License, Version 2.0
+~ (the "License"); you may not use this file except in compliance with
+~ the License.  You may obtain a copy of the License at
+~
+~    http://www.apache.org/licenses/LICENSE-2.0
+~
+~ Unless required by applicable law or agreed to in writing, software
+~ distributed under the License is distributed on an "AS IS" BASIS,
+~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+~ See the License for the specific language governing permissions and
+~ limitations under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+  <parent>
+    <groupId>org.apache.spark</groupId>
+    <artifactId>spark-parent</artifactId>
+    <version>1.1.0-SNAPSHOT</version>
+    <relativePath>../../pom.xml</relativePath>
+  </parent>
+
+  <!-- Kinesis integration is not included by default due to ASL-licensed code -->
+  <groupId>org.apache.spark</groupId>
+  <artifactId>spark-kinesis-asl_2.10</artifactId>
+  <packaging>jar</packaging>
+  <name>Spark Kinesis Integration</name>
+
+  <dependencies>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-core_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-streaming_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>com.amazonaws</groupId>
+      <artifactId>amazon-kinesis-client</artifactId>
+      <version>1.1.0</version>
+    </dependency>
+    <dependency>
+      <groupId>com.amazonaws</groupId>
+      <artifactId>aws-java-sdk</artifactId>
+      <version>1.8.3</version>
+    </dependency>
+    <dependency>
+      <groupId>org.scalatest</groupId>
+      <artifactId>scalatest_${scala.binary.version}</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.mockito</groupId>
+      <artifactId>mockito-all</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.scalacheck</groupId>
+      <artifactId>scalacheck_${scala.binary.version}</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.easymock</groupId>
+      <artifactId>easymockclassextension</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>com.novocode</groupId>
+      <artifactId>junit-interface</artifactId>
+      <scope>test</scope>
+    </dependency>
+  </dependencies>
+  <build>
+    <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
+    <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
+    <plugins>
+      <plugin>
+        <groupId>org.scalatest</groupId>
+        <artifactId>scalatest-maven-plugin</artifactId>
+      </plugin>
+    </plugins>
+  </build>
+</project>
diff --git a/extras/spark-kinesis-asl/src/main/java/org/apache/spark/examples/streaming/JavaKinesisWordCount.java b/extras/spark-kinesis-asl/src/main/java/org/apache/spark/examples/streaming/JavaKinesisWordCount.java
new file mode 100644
index 0000000000000..6f3a2454907ec
--- /dev/null
+++ b/extras/spark-kinesis-asl/src/main/java/org/apache/spark/examples/streaming/JavaKinesisWordCount.java
@@ -0,0 +1,310 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.examples.streaming;
+
+import java.util.List;
+import java.util.regex.Pattern;
+
+import org.apache.log4j.Level;
+import org.apache.log4j.Logger;
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaPairRDD;
+import org.apache.spark.api.java.function.FlatMapFunction;
+import org.apache.spark.api.java.function.Function;
+import org.apache.spark.api.java.function.Function2;
+import org.apache.spark.api.java.function.PairFunction;
+import org.apache.spark.storage.StorageLevel;
+import org.apache.spark.streaming.Duration;
+import org.apache.spark.streaming.Milliseconds;
+import org.apache.spark.streaming.api.java.JavaDStream;
+import org.apache.spark.streaming.api.java.JavaPairDStream;
+import org.apache.spark.streaming.api.java.JavaStreamingContext;
+import org.apache.spark.streaming.dstream.DStream;
+import org.apache.spark.streaming.kinesis.KinesisRecordSerializer;
+import org.apache.spark.streaming.kinesis.KinesisStringRecordSerializer;
+import org.apache.spark.streaming.kinesis.KinesisUtils;
+
+import scala.Tuple2;
+
+import com.amazonaws.auth.DefaultAWSCredentialsProviderChain;
+import com.amazonaws.services.kinesis.AmazonKinesisClient;
+import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream;
+import com.google.common.base.Optional;
+import com.google.common.collect.Lists;
+
+/**
+ * Java-friendly Kinesis Spark Streaming WordCount example
+ *
+ * See http://spark.apache.org/docs/latest/streaming-programming-guide.html for more details on the Kinesis Spark Streaming integration.
+ *
+ * This example spins up 1 Kinesis Worker (Spark Streaming Receivers) per shard of the given stream.
+ * It then starts pulling from the tip of the given <stream-name> and <endpoint-url> at the given <batch-interval>.
+ * Because we're pulling from the tip (InitialPositionInStream.LATEST), only new stream data will be picked up after the KinesisReceiver starts.
+ * This could lead to missed records if data is added to the stream while no KinesisReceivers are running.
+ * In production, you'll want to switch to InitialPositionInStream.TRIM_HORIZON which will read up to 24 hours (Kinesis limit) of previous stream data 
+ *  depending on the checkpoint frequency.
+ * InitialPositionInStream.TRIM_HORIZON may lead to duplicate processing of records depending on the checkpoint frequency.
+ * Record processing should be idempotent when possible.
+ *
+ * This code uses the DefaultAWSCredentialsProviderChain and searches for credentials in the following order of precedence: 
+ *         Environment Variables - AWS_ACCESS_KEY_ID and AWS_SECRET_KEY
+ *         Java System Properties - aws.accessKeyId and aws.secretKey
+ *         Credential profiles file - default location (~/.aws/credentials) shared by all AWS SDKs
+ *         Instance profile credentials - delivered through the Amazon EC2 metadata service
+ *
+ * Usage: JavaKinesisWordCount <stream-name> <endpoint-url> <batch-interval>
+ *         <stream-name> is the name of the Kinesis stream (ie. mySparkStream)
+ *         <endpoint-url> is the endpoint of the Kinesis service (ie. https://kinesis.us-east-1.amazonaws.com)
+ *         <batch-interval> is the batch interval in milliseconds (ie. 1000ms)
+ *
+ * Example:
+ *      $ export AWS_ACCESS_KEY_ID=<your-access-key>
+ *      $ export AWS_SECRET_KEY=<your-secret-key>
+ *        $ bin/run-kinesis-example  \
+ *            org.apache.spark.examples.streaming.JavaKinesisWordCount mySparkStream https://kinesis.us-east-1.amazonaws.com 1000
+ *
+ * There is a companion helper class called KinesisWordCountProducer which puts dummy data onto the Kinesis stream. 
+ * Usage instructions for KinesisWordCountProducer are provided in the class definition.
+ */
+public final class JavaKinesisWordCount {
+    private static final Pattern WORD_SEPARATOR = Pattern.compile(" ");
+    private static final Logger logger = Logger.getLogger(JavaKinesisWordCount.class);
+
+    /**
+     * Make the constructor private to enforce singleton
+     */
+    private JavaKinesisWordCount() {
+    }
+
+    public static void main(String[] args) {
+        /**
+         * Check that all required args were passed in.
+         */
+        if (args.length < 3) {
+            System.err.println("Usage: JavaKinesisWordCount <stream-name> <kinesis-endpoint-url> <batch-interval>");
+            System.exit(1);
+        }
+
+        /**
+         * (This was lifted from the StreamingExamples.scala in order to avoid the dependency on the spark-examples artifact.)
+         * Set reasonable logging levels for streaming if the user has not configured log4j.
+         */
+        boolean log4jInitialized = Logger.getRootLogger().getAllAppenders()
+                .hasMoreElements();
+        if (!log4jInitialized) {
+            /** We first log something to initialize Spark's default logging, then we override the logging level. */
+            Logger.getRootLogger()
+                    .info("Setting log level to [ERROR] for streaming example."
+                            + " To override add a custom log4j.properties to the classpath.");
+            Logger.getRootLogger().setLevel(Level.ERROR);
+            Logger.getLogger("org.apache.spark.examples.streaming").setLevel(Level.DEBUG);
+        }
+
+        /** Populate the appropriate variables from the given args */
+        String stream = args[0];
+        String endpoint = args[1];
+        Integer batchIntervalMillis = Integer.valueOf(args[2]);
+
+        /** Create a Kinesis client in order to determine the number of shards for the given stream */
+        AmazonKinesisClient KinesisClient = new AmazonKinesisClient(
+                new DefaultAWSCredentialsProviderChain());
+
+        /** Determine the number of shards from the stream */
+        int numShards = KinesisClient.describeStream(stream)
+                .getStreamDescription().getShards().size();
+
+        /** In this example, we're going to create 1 Kinesis Worker/Receiver/DStreams for each stream shard */ 
+        int numStreams = numShards;
+
+        /** Must add 1 more thread than the number of receivers or the output won't show properly from the driver */
+        int numSparkThreads = numStreams + 1;
+
+        /** Set the app name */
+        String app = "KinesisWordCount";
+
+        /** Setup the Spark config. */
+        SparkConf sparkConfig = new SparkConf().setAppName(app).setMaster(
+                "local[" + numSparkThreads + "]");
+
+        /**
+         * Set the batch interval.
+         * Records will be pulled from the Kinesis stream and stored as a single DStream within Spark every batch interval.
+         */
+        Duration batchInterval = Milliseconds.apply(batchIntervalMillis);
+
+        /**
+         * It's recommended that you perform a Spark checkpoint between 5 and 10 times the batch interval. 
+         * While this is the Spark checkpoint interval, we're going to use it for the Kinesis checkpoint interval, as well.
+         */
+        Duration checkpointInterval = batchInterval.$times(5);
+
+        /** Setup the StreamingContext */
+        JavaStreamingContext jssc = new JavaStreamingContext(sparkConfig, batchInterval);
+
+        /** Setup the checkpoint directory used by Spark Streaming */
+        jssc.checkpoint("/tmp/checkpoint");
+
+        /** Create the same number of Kinesis Receivers/DStreams as stream shards, then union them all */
+        JavaDStream<byte[]> allStreams = KinesisUtils
+                .createJavaStream(jssc, app, stream, endpoint, checkpointInterval.milliseconds(), 
+                                    InitialPositionInStream.LATEST, StorageLevel.MEMORY_AND_DISK_2());
+        /** Set the checkpoint interval */
+        allStreams.checkpoint(checkpointInterval);
+        for (int i = 1; i < numStreams; i++) {
+            /** Create a new Receiver/DStream for each stream shard */
+            JavaDStream<byte[]> dStream = KinesisUtils
+                    .createJavaStream(jssc, app, stream, endpoint, checkpointInterval.milliseconds(), 
+                                        InitialPositionInStream.LATEST, StorageLevel.MEMORY_AND_DISK_2());            
+            /** Set the Spark checkpoint interval */
+            dStream.checkpoint(checkpointInterval);
+
+            /** Union with the existing streams */
+            allStreams = allStreams.union(dStream);
+        }
+
+        /** This implementation uses the String-based KinesisRecordSerializer impl */
+        final KinesisRecordSerializer<String> recordSerializer = new KinesisStringRecordSerializer();
+
+        /**
+          * Split each line of the union'd DStreams into multiple words using flatMap to produce the collection.
+          * Convert lines of byte[] to multiple Strings by first converting to String, then splitting on WORD_SEPARATOR
+          * We're caching the result here so that we can use it later without having to re-materialize the underlying RDDs.
+          */
+        JavaDStream<String> words = allStreams
+                .flatMap(new FlatMapFunction<byte[], String>() {
+                    /**
+                     * Convert lines of byte[] to multiple words split by WORD_SEPARATOR
+                     * @param byte array
+                     * @return iterable of words split by WORD_SEPARATOR
+                     */
+                    @Override
+                    public Iterable<String> call(byte[] line) {
+                        return Lists.newArrayList(WORD_SEPARATOR.split(recordSerializer.deserialize(line)));
+                    }
+                }).cache();
+
+        /** windowInterval must be a multiple of the batchInterval */
+        Duration windowInterval = batchInterval.$times(5);
+
+        /** slideInterval must be a multiple of the batchInterval */
+        Duration slideInterval = batchInterval.$times(1);
+
+        /**
+         * Map each word to a (word, 1) tuple so we can reduce/aggregate later.
+         * We're caching the result here so that we can use it later without having
+         *     to re-materialize the underlying RDDs.
+         */
+        JavaPairDStream<String, Integer> wordCounts = words.mapToPair(
+                new PairFunction<String, String, Integer>() {
+                    /**
+                     * Create the (word, 1) tuple
+                     * @param word
+                     * @return (word, 1) tuple
+                     */
+                    @Override
+                    public Tuple2<String, Integer> call(String s) {
+                        return new Tuple2<String, Integer>(s, 1);
+                    }
+                });
+
+        /**
+         * Reduce/aggregate by key
+         * We're caching the result here so that we can use it later without having
+         *     to re-materialize the underlying RDDs.
+         */
+        JavaPairDStream<String, Integer> wordCountsByKey = wordCounts.reduceByKey(
+                new Function2<Integer, Integer, Integer>() {
+                    @Override
+                    public Integer call(Integer i1, Integer i2) {
+                        return i1 + i2;
+                    }
+                }).cache();
+
+        /**
+         * Reduce/aggregate by key for the given window.
+         * We're using the inverse-function (left - right) optimization over the sliding window per the Window Operations described at the following url:
+         *   http://spark.apache.org/docs/latest/streaming-programming-guide.html#transformations
+         */
+        JavaPairDStream<String, Integer> wordCountsByKeyAndWindow = wordCountsByKey.reduceByKeyAndWindow(
+                new Function2<Integer, Integer, Integer>() {
+                    @Override
+                    public Integer call(Integer i1, Integer i2) {
+                        return i1 + i2;
+                    }
+                }, windowInterval, slideInterval);
+
+        /**
+         * Sort and print the word counts by key and window.
+         * This is an Output Operation and will materialize the DStream.
+         */
+        sortAndPrint("Word Counts By Key and Window", wordCountsByKeyAndWindow);
+
+        Function2<List<Integer>, Optional<Integer>, Optional<Integer>> updateTotals =
+                  new Function2<List<Integer>, Optional<Integer>, Optional<Integer>>() {
+                    @Override public Optional<Integer> call(List<Integer> newCounts, Optional<Integer> currentCount) {
+                        Integer currentSum = 0;
+                        if (currentCount.isPresent()) {
+                            currentSum = currentCount.get();
+                        }
+                        Integer newSum = currentSum;
+
+                        for (Integer newCount : newCounts) {
+                            newSum += newCount;
+                        }
+                      return Optional.of(newSum);
+                    }
+                  };
+
+        /**
+         * Calculate the running totals using the updateTotals method.
+         */
+        JavaPairDStream<String, Integer> wordTotalsByKey = wordCountsByKey.updateStateByKey(updateTotals);
+
+        /**
+         * Sort and print the running word totals.
+         * This is an Output Operation and will materialize the DStream.
+         */
+        sortAndPrint("Word Count Totals By Key", wordTotalsByKey);
+
+        /** Start the streaming context and await termination */
+        jssc.start();
+        jssc.awaitTermination();
+    }
+
+    /**
+     * Sort and print the given dstream.
+     * This is an Output Operation that will materialize the underlying DStream.
+     * Everything up to this point is a lazy Transformation Operation.
+     * 
+     * @param description of the dstream for logging purposes
+     * @param dstream to sort and print
+     */
+    private static void sortAndPrint(final String description, JavaPairDStream<String, Integer> dstream) {
+         dstream.foreachRDD(
+            new Function<JavaPairRDD<String, Integer>, Void>() {
+               public Void call(JavaPairRDD<String, Integer> batch) {
+                  JavaPairRDD<String, Integer> sortedBatch = batch.sortByKey(true);
+                  logger.info(description);
+                  for (Object wordCount: sortedBatch.collect()) {
+                      logger.info(wordCount);
+                  }
+
+                  return null;
+               }
+            });
+    }
+}
diff --git a/extras/spark-kinesis-asl/src/main/resources/log4j.properties b/extras/spark-kinesis-asl/src/main/resources/log4j.properties
new file mode 100644
index 0000000000000..ad789341e62c9
--- /dev/null
+++ b/extras/spark-kinesis-asl/src/main/resources/log4j.properties
@@ -0,0 +1,42 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Set everything to be logged to the file streaming/target/unit-tests.log
+log4j.rootCategory=WARN, console
+
+# File appender
+log4j.appender.file=org.apache.log4j.FileAppender
+log4j.appender.file.append=false
+log4j.appender.file.file=target/unit-tests.log
+log4j.appender.file.layout=org.apache.log4j.PatternLayout
+log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %p %c{1}: %m%n
+
+# Console appender
+log4j.appender.console=org.apache.log4j.ConsoleAppender
+log4j.appender.console.target=System.out
+log4j.appender.console.layout=org.apache.log4j.PatternLayout
+log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n
+
+# Settings to quiet third party logs that are too verbose
+log4j.logger.org.eclipse.jetty=WARN
+log4j.logger.org.eclipse.jetty.util.component.AbstractLifeCycle=ERROR
+log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO
+log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO
+
+# Log all Kinesis Streaming messages
+log4j.logger.org.apache.spark.examples.streaming=DEBUG
+log4j.logger.org.apache.spark.streaming.kinesis=DEBUG
diff --git a/extras/spark-kinesis-asl/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCount.scala b/extras/spark-kinesis-asl/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCount.scala
new file mode 100644
index 0000000000000..0a0cccb49433d
--- /dev/null
+++ b/extras/spark-kinesis-asl/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCount.scala
@@ -0,0 +1,345 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.streaming
+
+import java.nio.ByteBuffer
+import org.apache.log4j.Level
+import org.apache.log4j.Logger
+import org.apache.spark.Logging
+import org.apache.spark.SparkConf
+import org.apache.spark.SparkContext._
+import org.apache.spark.streaming.Milliseconds
+import org.apache.spark.streaming.StreamingContext
+import org.apache.spark.streaming.StreamingContext.toPairDStreamFunctions
+import org.apache.spark.streaming.kinesis.KinesisStringRecordSerializer
+import org.apache.spark.streaming.kinesis.KinesisUtils
+import com.amazonaws.auth.DefaultAWSCredentialsProviderChain
+import com.amazonaws.services.kinesis.AmazonKinesisClient
+import com.amazonaws.services.kinesis.model.PutRecordRequest
+import scala.util.Random
+import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream
+import org.apache.spark.storage.StorageLevel
+import org.apache.spark.streaming.dstream.ReceiverInputDStream
+import org.apache.spark.streaming.dstream.DStream
+
+/**
+ * Kinesis Spark Streaming WordCount example.
+ *
+ * See http://spark.apache.org/docs/latest/streaming-programming-guide.html for more details on the Kinesis Spark Streaming integration.
+ *
+ * This example spins up 1 Kinesis Worker (Spark Streaming Receivers) per shard of the given stream.
+ * It then starts pulling from the tip of the given <stream-name> and <endpoint-url> at the given <batch-interval>.
+ * Because we're pulling from the tip (InitialPositionInStream.LATEST), only new stream data will be picked up after the KinesisReceiver starts.
+ * This could lead to missed records if data is added to the stream while no KinesisReceivers are running.
+ * In production, you'll want to switch to InitialPositionInStream.TRIM_HORIZON which will read up to 24 hours (Kinesis limit) of previous stream data 
+ *  depending on the checkpoint frequency.
+ *
+ * InitialPositionInStream.TRIM_HORIZON may lead to duplicate processing of records depending on the checkpoint frequency.
+ * Record processing should be idempotent when possible.
+ *
+ * This code uses the DefaultAWSCredentialsProviderChain and searches for credentials in the following order of precedence:
+ * Environment Variables - AWS_ACCESS_KEY_ID and AWS_SECRET_KEY
+ * Java System Properties - aws.accessKeyId and aws.secretKey
+ * Credential profiles file - default location (~/.aws/credentials) shared by all AWS SDKs
+ * Instance profile credentials - delivered through the Amazon EC2 metadata service
+ *
+ * Usage: KinesisWordCount <stream-name> <endpoint-url> <batch-interval>
+ *   <stream-name> is the name of the Kinesis stream (ie. mySparkStream)
+ *   <endpoint-url> is the endpoint of the Kinesis service (ie. https://kinesis.us-east-1.amazonaws.com)
+ *   <batch-interval> is the batch interval in millis (ie. 1000ms)
+ *
+ * Example:
+ *      $ export AWS_ACCESS_KEY_ID=<your-access-key>
+ *      $ export AWS_SECRET_KEY=<your-secret-key>
+ *    $ bin/run-kinesis-example \
+ *        org.apache.spark.examples.streaming.KinesisWordCount mySparkStream https://kinesis.us-east-1.amazonaws.com 100
+ *
+ * There is a companion helper class below called KinesisWordCountProducer which puts dummy data onto the Kinesis stream.
+ * Usage instructions for KinesisWordCountProducer are provided in that class definition.
+ */
+object KinesisWordCount extends Logging {
+  val WordSeparator = " "
+
+  def main(args: Array[String]) {
+/**
+ * Check that all required args were passed in.
+ */
+    if (args.length < 3) {
+      System.err.println("Usage: KinesisWordCount <stream-name> <endpoint-url> <batch-interval>")
+      System.exit(1)
+    }
+
+    /**
+     * (This was lifted from the StreamingExamples.scala in order to avoid the dependency on the spark-examples artifact.)
+     * Set reasonable logging levels for streaming if the user has not configured log4j.
+     */
+    val log4jInitialized = Logger.getRootLogger.getAllAppenders.hasMoreElements
+    if (!log4jInitialized) {
+      /** We first log something to initialize Spark's default logging, then we override the logging level. */
+      logInfo("Setting log level to [INFO] for streaming example." +
+        " To override add a custom log4j.properties to the classpath.")
+
+      Logger.getRootLogger().setLevel(Level.INFO)
+      Logger.getLogger("org.apache.spark.examples.streaming").setLevel(Level.DEBUG);
+    }
+
+    /** Populate the appropriate variables from the given args */
+    val Array(stream, endpoint, batchIntervalMillisStr) = args
+    val batchIntervalMillis = batchIntervalMillisStr.toInt
+
+    /** Create a Kinesis client in order to determine the number of shards for the given stream */
+    val KinesisClient = new AmazonKinesisClient(new DefaultAWSCredentialsProviderChain());
+
+    /** Determine the number of shards from the stream */
+    val numShards = KinesisClient.describeStream(stream).getStreamDescription().getShards().size()
+
+    /** In this example, we're going to create 1 Kinesis Worker/Receiver/DStreams for each stream shard */
+    val numStreams = numShards
+
+    /** Must add 1 more thread than the number of receivers or the output won't show properly from the driver */
+    val numSparkThreads = numStreams + 1
+
+    /** Set the app name */
+    val app = "KinesisWordCount"
+
+    /** Setup the Spark config. */
+    val sparkConfig = new SparkConf().setAppName(app).setMaster(s"local[$numSparkThreads]")
+
+    /**
+     * Set the batch interval.
+     * Records will be pulled from the Kinesis stream and stored as a single DStream within Spark every batch interval.
+     */
+    val batchInterval = Milliseconds(batchIntervalMillis)
+
+    /**
+     * It's recommended that you perform a Spark checkpoint between 5 and 10 times the batch interval.
+     * While this is the Spark checkpoint interval, we're going to use it for the Kinesis checkpoint interval, as well.
+     */
+    val checkpointInterval = batchInterval * 5
+
+    /** Setup the StreamingContext */
+    val ssc = new StreamingContext(sparkConfig, batchInterval)
+
+    /** Setup the checkpoint directory used by Spark Streaming */
+    ssc.checkpoint("/tmp/checkpoint");
+
+    /** Create the same number of Kinesis Receivers/DStreams as stream shards, then union them all */
+    var allStreams: DStream[Array[Byte]] = KinesisUtils.createStream(ssc, app, stream, endpoint, checkpointInterval.milliseconds,
+      InitialPositionInStream.LATEST, StorageLevel.MEMORY_AND_DISK_2)
+      /** Set the checkpoint interval */
+    allStreams.checkpoint(checkpointInterval)
+    for (i <- 1 until numStreams) {
+      /** Create a new Receiver/DStream for each stream shard */
+      val dStream = KinesisUtils.createStream(ssc, app, stream, endpoint, checkpointInterval.milliseconds,
+        InitialPositionInStream.LATEST, StorageLevel.MEMORY_AND_DISK_2)
+      /** Set the Spark checkpoint interval */
+      dStream.checkpoint(checkpointInterval)
+
+      /** Union with the existing streams */
+      allStreams = allStreams.union(dStream)
+    }
+
+    /** This implementation uses the String-based KinesisRecordSerializer impl */
+    val recordSerializer = new KinesisStringRecordSerializer()
+
+    /**
+     * Sort and print the given dstream.
+     * This is an Output Operation that will materialize the underlying DStream.
+     * Everything up to this point is a lazy Transformation Operation.
+     * 
+     * @param description of the dstream for logging purposes
+     * @param dstream to sort and print
+     */
+    def sortAndPrint(description: String, dstream: DStream[(String,Int)]) = {
+        dstream.foreachRDD((batch, endOfWindowTime) => {
+            val sortedBatch = batch.sortByKey(true)
+            logInfo(s"$description @ $endOfWindowTime")
+            sortedBatch.collect().foreach(
+                wordCount => logInfo(s"$wordCount"))
+        })
+    }
+
+    /**
+     * Split each line of the union'd DStreams into multiple words using flatMap to produce the collection.
+     * Convert lines of Array[Byte] to multiple Strings by first converting to String, then splitting on WORD_SEPARATOR
+     * We're caching the result here so that we can use it later without having to re-materialize the underlying RDDs.
+     */
+    val words = allStreams.flatMap(line => recordSerializer.deserialize(line).split(WordSeparator)).cache()
+
+    /** windowInterval must be a multiple of the batchInterval */
+    val windowInterval = batchInterval * 5
+
+    /** slideInterval must be a multiple of the batchInterval */
+    val slideInterval = batchInterval * 1
+
+    /**
+     * Map each word to a (word, 1) tuple so we can reduce/aggregate later.
+     * We're caching the result here so that we can use it later without having
+     * to re-materialize the underlying RDDs.
+     */
+    val wordCounts = words.map(word => (word, 1))
+
+    /**
+     * Reduce/aggregate by key.
+     * We're caching the result here so that we can use it later without having
+     * to re-materialize the underlying RDDs.
+     */
+    val wordCountsByKey = wordCounts.reduceByKey((left, right) => left + right)
+
+    /**
+     * Reduce/aggregate by key for the given window.
+     * We're using the inverse-function (left - right) optimization over the sliding window per the Window Operations described at the following url:
+     *   http://spark.apache.org/docs/latest/streaming-programming-guide.html#transformations
+     */
+    val wordCountsByKeyAndWindow = wordCountsByKey.reduceByKeyAndWindow((left, right) => left + right, (left, right) => left - right, windowInterval, slideInterval)
+
+    /**
+     * Sort and print the word counts by key and window.
+     * This is an Output Operation and will materialize the DStream.
+     * 
+     */
+    sortAndPrint("Word Counts By Key and Window", wordCountsByKeyAndWindow)
+
+    /**
+     * Update the running totals of words.
+     *
+     * @param sequence of new counts
+     * @param current running total (could be None if no current count exists)
+     */
+    def updateTotals = (newCounts: Seq[Int], currentCounts: Option[Int]) => {
+      val newCount = newCounts.foldLeft(0)((left, right) => left + right)
+      val currentCount = currentCounts.getOrElse(0)
+      Some(newCount + currentCount)
+    }
+
+    /**
+     * Calculate the running totals using the updateTotals method.
+     */
+    val wordTotalsByKey = wordCountsByKey.updateStateByKey[Int](updateTotals)
+
+    /**
+     * Sort and print the running word totals.
+     * This is an Output Operation and will materialize the DStream.
+     */
+    sortAndPrint("Word Count Totals By Key", wordTotalsByKey)
+
+    /** Start the streaming context and await termination */
+    ssc.start()
+    ssc.awaitTermination()
+  }
+}
+
+/**
+ * Usage: KinesisWordCountProducer <stream-name> <kinesis-endpoint-url> <recordsPerSec> <wordsPerRecord>
+ *   <stream-name> is the name of the Kinesis stream (ie. mySparkStream)
+ *   <kinesis-endpoint-url> is the endpoint of the Kinesis service (ie. https://kinesis.us-east-1.amazonaws.com)
+ *   <records-per-sec> is the rate of records per second to put onto the stream
+ *   <words-per-record> is the rate of records per second to put onto the stream
+ *
+ * Example:
+ *      $ export AWS_ACCESS_KEY_ID=<your-access-key>
+ *      $ export AWS_SECRET_KEY=<your-secret-key>
+ *    $ bin/run-kinesis-example \
+ *    org.apache.spark.examples.streaming.KinesisWordCountProducer mySparkStream https://kinesis.us-east-1.amazonaws.com 10 5
+ */
+private[streaming]
+object KinesisWordCountProducer extends Logging {
+  val MaxRandomInts = 10
+
+  def main(args: Array[String]) {
+    if (args.length < 4) {
+      System.err.println("Usage: KinesisWordCountProducer <stream-name> <endpoint-url> <records-per-sec> <words-per-record>")
+      System.exit(1)
+    }
+
+    /**
+     * (This was lifted from the StreamingExamples.scala in order to avoid the dependency on the spark-examples artifact.)
+     * Set reasonable logging levels for streaming if the user has not configured log4j.
+     */
+    val log4jInitialized = Logger.getRootLogger.getAllAppenders.hasMoreElements
+    if (!log4jInitialized) {
+      /** We first log something to initialize Spark's default logging, then we override the logging level. */
+      logInfo("Setting log level to [INFO] for streaming example." +
+        " To override add a custom log4j.properties to the classpath.")
+
+      Logger.getRootLogger().setLevel(Level.INFO)
+      Logger.getLogger("org.apache.spark.examples.streaming").setLevel(Level.DEBUG);
+    }
+
+    /** Populate the appropriate variables from the given args */
+    val Array(stream, endpoint, recordsPerSecond, wordsPerRecord) = args
+
+    /** Generate the records and return the totals */
+    val totals: Seq[(Int, Int)] = generate(stream, endpoint, recordsPerSecond.toInt, wordsPerRecord.toInt)
+
+    logInfo("Totals")
+    /** Print the array of (index, total) tuples */
+    totals.foreach(total => logInfo(total.toString()))
+  }
+
+  def generate(stream: String, endpoint: String, recordsPerSecond: Int, wordsPerRecord: Int): Seq[(Int, Int)] = {
+    val WORD_SEPARATOR = " "
+
+    /** Create the Kinesis client */
+    val KinesisClient = new AmazonKinesisClient(new DefaultAWSCredentialsProviderChain())
+
+    logInfo(s"Putting records onto stream $stream and endpoint $endpoint at a rate of $recordsPerSecond records per second and $wordsPerRecord words per record");
+
+    /** Create the String-based record serializer */
+    val recordSerializer = new KinesisStringRecordSerializer()
+
+    val totals = new Array[Int](MaxRandomInts)
+    /** Put String records onto the stream per the given recordPerSec and wordsPerRecord */
+    for (i <- 1 to 5) {
+      /** Generate recordsPerSec records to put onto the stream */
+      val records = (1 to recordsPerSecond.toInt).map { recordNum =>
+        /** Randomly generate each wordsPerRec words between 0 (inclusive) and MAX_RANDOM_INTS (exclusive) */
+        val data = (1 to wordsPerRecord.toInt).map(x => {
+          /** Generate the random int */
+          val randomInt = Random.nextInt(MaxRandomInts)
+
+          /** Keep track of the totals */
+          totals(randomInt) += 1
+
+          /** Convert the Int to a String */
+          randomInt.toString()
+        })
+          /** Create a String of randomInts separated by WORD_SEPARATOR */
+          .mkString(WORD_SEPARATOR)
+
+        /** Create a partitionKey based on recordNum */
+        val partitionKey = s"partitionKey-$recordNum"
+
+        /** Create a PutRecordRequest with an Array[Byte] version of the data */
+        val putRecordRequest = new PutRecordRequest().withStreamName(stream).withPartitionKey(partitionKey)
+          .withData(ByteBuffer.wrap(recordSerializer.serialize(data)));
+
+        /** Put the record onto the stream and capture the PutRecordResult */
+        val putRecordResult = KinesisClient.putRecord(putRecordRequest);
+
+        logInfo(s"Successfully put record with partitionKey $partitionKey and shardId ${putRecordResult.getShardId()} and data $data")
+      }
+
+      /** Sleep for a second */
+      Thread.sleep(1000)
+    }
+
+    /** Convert the totals to (index, total) tuple */
+    (0 to (MaxRandomInts - 1)).zip(totals)
+  }
+}
diff --git a/extras/spark-kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/CheckpointState.scala b/extras/spark-kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/CheckpointState.scala
new file mode 100644
index 0000000000000..a28d022cb61c8
--- /dev/null
+++ b/extras/spark-kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/CheckpointState.scala
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.streaming.kinesis
+
+import org.apache.spark.Logging
+import org.apache.spark.streaming.util.Clock
+import org.apache.spark.streaming.util.ManualClock
+import org.apache.spark.streaming.util.SystemClock
+
+/**
+ * This is a helper class for managing checkpoint clocks.
+ *
+ * @param checkpoint interval in millis
+ * @param current clock.  if none specified, will default to current SystemClock
+ */
+class CheckpointState(checkpointIntervalMillis: Long, currentClock: Clock = new SystemClock()) extends Logging {
+  /**
+   * Initialize the checkpoint clock using the given currentClock + checkpointIntervalMillis
+   */
+   val checkpointClock = new ManualClock()
+   checkpointClock.setTime(currentClock.currentTime() + checkpointIntervalMillis)
+
+  /**
+   * Check if it's time to checkpoint based on the current time and the derived time for the next checkpoint
+   *
+   * @return true if it's time to checkpoint
+   */
+  def shouldCheckpoint(): Boolean = {
+    new SystemClock().currentTime() > checkpointClock.currentTime()
+  }
+
+  /**
+   * Advance the checkpoint clock by the checkpoint interval.
+   */
+  def advanceCheckpoint() = {
+    checkpointClock.addToTime(checkpointIntervalMillis)
+  }
+}
diff --git a/extras/spark-kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisReceiver.scala b/extras/spark-kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisReceiver.scala
new file mode 100644
index 0000000000000..98eed6eb196d9
--- /dev/null
+++ b/extras/spark-kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisReceiver.scala
@@ -0,0 +1,122 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.streaming.kinesis
+
+import java.net.InetAddress
+import java.util.UUID
+import org.apache.spark.Logging
+import org.apache.spark.storage.StorageLevel
+import org.apache.spark.streaming.receiver.Receiver
+import com.amazonaws.auth.DefaultAWSCredentialsProviderChain
+import com.amazonaws.services.kinesis.clientlibrary.interfaces.IRecordProcessor
+import com.amazonaws.services.kinesis.clientlibrary.interfaces.IRecordProcessorFactory
+import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream
+import com.amazonaws.services.kinesis.clientlibrary.lib.worker.KinesisClientLibConfiguration
+import com.amazonaws.services.kinesis.clientlibrary.lib.worker.Worker
+import java.nio.ByteBuffer
+import org.apache.spark.streaming.util.SystemClock
+
+/**
+ * Custom AWS Kinesis-specific implementation of Spark Streaming's Receiver.
+ * This implementation relies on the Kinesis Client Library (KCL) Worker as described here:
+ * https://github.com/awslabs/amazon-kinesis-client
+ * This is a custom receiver used with StreamingContext.receiverStream(Receiver) as described here:
+ * http://spark.apache.org/docs/latest/streaming-custom-receivers.html
+ * Instances of this class will get shipped to the Spark Streaming Workers to run within a Spark Executor.
+ *
+ * @param app name
+ * @param Kinesis stream name
+ * @param endpoint url of Kinesis service
+ * @param checkpoint interval (millis) for Kinesis checkpointing (not Spark checkpointing).
+ *   See the Kinesis Spark Streaming documentation for more details on the different types of checkpoints.
+ * @param in the absence of Kinesis checkpoint info, this is the worker's initial starting position in the stream.
+ *   The values are either the beginning of the stream per Kinesis' limit of 24 hours (InitialPositionInStream.TRIM_HORIZON)
+ *      or the tip of the stream using InitialPositionInStream.LATEST.
+ * @param persistence strategy for RDDs and DStreams.
+ */
+private[streaming] class KinesisReceiver(
+  app: String,
+  stream: String,
+  endpoint: String,
+  checkpointIntervalMillis: Long,
+  initialPositionInStream: InitialPositionInStream,
+  storageLevel: StorageLevel)
+  extends Receiver[Array[Byte]](storageLevel) with Logging { receiver =>
+
+  /**
+   *  The lazy val's below will get instantiated in the remote Executor after the closure is shipped to the Spark Worker. 
+   *  These are all lazy because they're from third-party Amazon libraries and are not Serializable.
+   *  If they're not marked lazy, they will cause NotSerializableExceptions when they're shipped to the Spark Worker.
+   */
+
+  /**
+   *  workerId is lazy because we want the address of the actual Worker where the code runs - not the Driver's ip address.
+   *  This makes a difference when running in a cluster.
+   */
+  lazy val workerId = InetAddress.getLocalHost.getHostAddress() + ":" + UUID.randomUUID()
+
+  /**
+   * This impl uses the DefaultAWSCredentialsProviderChain per the following url:
+   *    http://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/auth/DefaultAWSCredentialsProviderChain.html
+   *  and searches for credentials in the following order of precedence:
+   * Environment Variables - AWS_ACCESS_KEY_ID and AWS_SECRET_KEY
+   * Java System Properties - aws.accessKeyId and aws.secretKey
+   * Credential profiles file at the default location (~/.aws/credentials) shared by all AWS SDKs and the AWS CLI
+   * Instance profile credentials delivered through the Amazon EC2 metadata service
+   */
+  lazy val credentialsProvider = new DefaultAWSCredentialsProviderChain()
+
+  /** Create a KCL config instance. */
+  lazy val KinesisClientLibConfiguration = new KinesisClientLibConfiguration(app, stream, credentialsProvider, workerId)
+    .withKinesisEndpoint(endpoint).withInitialPositionInStream(initialPositionInStream).withTaskBackoffTimeMillis(500)
+
+  /**
+   *  RecordProcessorFactory creates impls of IRecordProcessor.
+   *  IRecordProcessor adapts the KCL to our Spark KinesisReceiver via the IRecordProcessor.processRecords() method.
+   *  We're using our custom KinesisRecordProcessor in this case.
+   */
+  lazy val recordProcessorFactory: IRecordProcessorFactory = new IRecordProcessorFactory {
+    override def createProcessor: IRecordProcessor = new KinesisRecordProcessor(receiver, workerId, KinesisUtils.createCheckpointState(checkpointIntervalMillis))
+  }
+
+  /**
+   * Create a Kinesis Worker.
+   * This is the core client abstraction from the Kinesis Client Library (KCL).
+   * We pass the RecordProcessorFactory from above as well as the KCL config instance.
+   * A Kinesis Worker can process 1..* shards from the given stream - each with its own RecordProcessor.
+   */
+  lazy val worker: Worker = new Worker(recordProcessorFactory, KinesisClientLibConfiguration);
+
+  /**
+   *  This is called when the KinesisReceiver starts and must be non-blocking.
+   *  The KCL creates and manages the receiving/processing thread pool through the Worker.run() method.
+   */
+  override def onStart() {
+    logInfo(s"Starting receiver with workerId $workerId")
+    worker.run()
+  }
+
+  /**
+   *  This is called when the KinesisReceiver stops.
+   *  The KCL worker.shutdown() method stops the receiving/processing threads.
+   *  The KCL will do its best to drain and checkpoint any in-flight records upon shutdown.
+   */
+  override def onStop() {
+    logInfo(s"Shutting down receiver with workerId $workerId")
+    worker.shutdown()
+  }
+}
diff --git a/extras/spark-kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisRecordProcessor.scala b/extras/spark-kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisRecordProcessor.scala
new file mode 100644
index 0000000000000..8dd24501fe381
--- /dev/null
+++ b/extras/spark-kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisRecordProcessor.scala
@@ -0,0 +1,148 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.streaming.kinesis
+
+import java.util.List
+import scala.collection.JavaConversions.asScalaBuffer
+import scala.collection.mutable.ArrayBuffer
+import org.apache.spark.Logging
+import org.apache.spark.streaming.util.ManualClock
+import org.apache.spark.streaming.util.SystemClock
+import com.amazonaws.services.kinesis.clientlibrary.interfaces.IRecordProcessor
+import com.amazonaws.services.kinesis.clientlibrary.interfaces.IRecordProcessorCheckpointer
+import com.amazonaws.services.kinesis.clientlibrary.types.ShutdownReason
+import com.amazonaws.services.kinesis.model.Record
+import scala.compat.Platform
+import org.apache.spark.streaming.util.Clock
+
+/**
+ * Kinesis-specific implementation of the Kinesis Client Library (KCL) IRecordProcessor.
+ * This implementation operates on the Array[Byte] from the KinesisReceiver.
+ * The Kinesis Worker creates an instance of this KinesisRecordProcessor upon startup.
+ *
+ * @param Kinesis receiver
+ * @param workerId for logging purposes
+ * @param checkpoint utils
+ * @param Kinesis checkpoint interval (millis)
+ */
+private[streaming] class KinesisRecordProcessor(
+  receiver: KinesisReceiver,
+  workerId: String,
+  checkpointState: CheckpointState) extends IRecordProcessor with Logging {
+
+  /** shardId to be populated during initialize() */
+  var shardId: String = _
+
+  /**
+   * The Kinesis Client Library calls this method during IRecordProcessor initialization.
+   *
+   * @param shardId assigned by the KCL to this particular RecordProcessor.
+   */
+  override def initialize(shardId: String) {
+    logInfo(s"Initialize:  Initializing workerId $workerId with shardId $shardId")
+
+    this.shardId = shardId
+  }
+
+  /**
+   * This method is called by the KCL when a batch of records is pulled from the Kinesis stream.
+   * This is the record-processing bridge between the KCL's IRecordProcessor.processRecords()
+   * and Spark Streaming's Receiver.store().
+   *
+   * @param list of records from the Kinesis stream shard
+   * @param checkpointer used to update Kinesis when this batch has been processed/stored in the DStream
+   */
+  override def processRecords(batch: List[Record], checkpointer: IRecordProcessorCheckpointer) {
+    if (!receiver.isStopped()) {
+      try {
+        /**
+         * Convert the list of records to a list of Array[Byte]
+         * Note:  If we try to store the raw ByteBuffer from record.getData(), the Spark Streaming
+         * Receiver.store(ByteBuffer) attempts to deserialize the ByteBuffer using the
+         *   internally-configured Spark serializer (kryo, etc).
+         *        This is not desirable, so we instead store a raw Array[Byte] and decouple
+         *        ourselves from the internal serialization strategy.
+         */
+        val batchByteArrays = new ArrayBuffer[Array[Byte]](batch.size())
+        batchByteArrays ++= batch.map(record => record.getData().array())
+
+        /** Store the list of Array[Byte] in Spark */
+        KinesisUtils.retry(receiver.store(batchByteArrays), 4, 500)
+        logDebug(s"Stored:  Worker $workerId stored ${batch.size} records for shardId $shardId")
+
+        /**
+         * Checkpoint the sequence number of the last record successfully processed/stored in the batch.
+         * In this implementation, we're checkpointing after the given checkpointIntervalMillis.
+         */
+        if (checkpointState.shouldCheckpoint()) {
+          /** Perform the checkpoint */
+          KinesisUtils.retry(checkpointer.checkpoint(), 4, 500)
+
+          /** Update the next checkpoint time */
+          checkpointState.advanceCheckpoint()
+
+          logDebug(s"Checkpoint:  WorkerId $workerId completed checkpoint of ${batch.size} records for shardId $shardId")
+          logDebug(s"Checkpoint:  Next checkpoint is at ${checkpointState.checkpointClock.currentTime()} for shardId $shardId")
+        }
+      } catch {
+        case e: Throwable => {
+          /**
+           *  If there is a failure within the batch, the batch will not be checkpointed.
+           *  This will potentially cause records since the last checkpoint to be processed more than once.
+           */
+          logError(s"Exception:  WorkerId $workerId encountered and exception while storing or checkpointing a batch for workerId $workerId and shardId $shardId.", e)
+
+          /** Rethrow the exception to the Kinesis Worker that is managing this RecordProcessor. */
+          throw e
+        }
+      }
+    } else {
+      /** RecordProcessor has been stopped. */
+      logInfo(s"Stopped:  The Spark KinesisReceiver has stopped for workerId $workerId and shardId $shardId.  No more records will be processed.")
+    }
+  }
+
+  /**
+   * Kinesis Client Library is shutting down this Worker for 1 of 2 reasons:
+   * 1) the stream is resharding by splitting or merging adjacent shards (ShutdownReason.TERMINATE)
+   * 2) the failed or latent Worker has stopped sending heartbeats for whatever reason (ShutdownReason.ZOMBIE)
+   *
+   * @param checkpointer used to performn a Kinesis checkpoint for ShutdownReason.TERMINATE
+   * @param shutdown reason (ShutdownReason.TERMINATE or ShutdownReason.ZOMBIE)
+   */
+  override def shutdown(checkpointer: IRecordProcessorCheckpointer, reason: ShutdownReason) {
+    logInfo(s"Shutdown:  Shutting down workerId $workerId with reason $reason")
+    reason match {
+      /**
+       * TERMINATE Use Case.  Checkpoint.
+       * Checkpoint to indicate that all records from the shard have been drained and processed.
+       * It's now OK to read from the new shards that resulted from a resharding event.
+       */
+      case ShutdownReason.TERMINATE => KinesisUtils.retry(checkpointer.checkpoint(), 4, 500)
+
+      /**
+       * ZOMBIE Use Case.  NoOp.
+       * No checkpoint because other workers may have taken over and already started processing the same records.
+       * This may lead to records being processed more than once.
+       */
+      case ShutdownReason.ZOMBIE =>
+
+      /** Unknown reason.  NoOp */
+      case _ =>
+    }
+  }
+}
diff --git a/extras/spark-kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisRecordSerializer.scala b/extras/spark-kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisRecordSerializer.scala
new file mode 100644
index 0000000000000..172c9b14eebca
--- /dev/null
+++ b/extras/spark-kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisRecordSerializer.scala
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.streaming.kinesis
+
+import org.apache.spark.storage.StorageLevel
+import org.apache.spark.streaming.receiver.Receiver
+import com.amazonaws.auth.AWSCredentialsProvider
+import com.amazonaws.auth.DefaultAWSCredentialsProviderChain
+import org.apache.spark.SparkConf
+import org.apache.spark.streaming.StreamingContext
+import org.apache.spark.SparkContext
+import org.apache.spark.streaming.Duration
+import org.apache.spark.streaming.Seconds
+import org.apache.spark.streaming.dstream.ReceiverInputDStream
+import scala.reflect.ClassTag
+import org.apache.spark.streaming.api.java.JavaStreamingContext
+import org.apache.spark.streaming.api.java.JavaReceiverInputDStream
+import java.nio.ByteBuffer
+
+/**
+ * Convert custom types to/from Array[Byte].
+ * @tparam type to serialize/deserialize
+ */
+private[streaming] trait KinesisRecordSerializer[T] extends Serializable {
+  /**
+   * Convert type to Array[Byte]
+   *
+   * @param type to serialize
+   * @return byte array
+   */
+  def serialize(t: T): Array[Byte]
+
+  /**
+   * Convert Array[Byte] to type
+   *
+   * @param byte array
+   * @return deserialized type
+   */
+  def deserialize(array: Array[Byte]): T
+}
diff --git a/extras/spark-kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisStringRecordSerializer.scala b/extras/spark-kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisStringRecordSerializer.scala
new file mode 100644
index 0000000000000..4fd9c39b3c535
--- /dev/null
+++ b/extras/spark-kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisStringRecordSerializer.scala
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.streaming.kinesis
+
+import java.nio.ByteBuffer
+import java.nio.charset.Charset
+import java.nio.CharBuffer
+import org.apache.spark.Logging
+
+/**
+ * Implementation of KinesisRecordSerializer to convert Array[Byte] to/from String.
+ */
+class KinesisStringRecordSerializer extends KinesisRecordSerializer[String] with Logging {
+  /**
+   * Convert String to Array[Byte]
+   *
+   * @param string to serialize
+   * @return byte array
+   */
+  def serialize(string: String): Array[Byte] = {
+    string.getBytes()
+  }
+
+  /**
+   * Convert Array[Byte] to String
+   *
+   * @param byte array
+   * @return deserialized string
+   */
+  def deserialize(array: Array[Byte]): String = {
+    new String(array)
+  }
+}
diff --git a/extras/spark-kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisUtils.scala b/extras/spark-kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisUtils.scala
new file mode 100644
index 0000000000000..0c3a3cc0043a6
--- /dev/null
+++ b/extras/spark-kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisUtils.scala
@@ -0,0 +1,151 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.streaming.kinesis
+
+import org.apache.spark.streaming.StreamingContext
+import org.apache.spark.streaming.api.java.JavaReceiverInputDStream
+import org.apache.spark.streaming.api.java.JavaStreamingContext
+import org.apache.spark.streaming.dstream.ReceiverInputDStream
+import com.amazonaws.services.kinesis.clientlibrary.exceptions.ThrottlingException
+import com.amazonaws.services.kinesis.clientlibrary.exceptions.KinesisClientLibDependencyException
+import com.amazonaws.services.kinesis.clientlibrary.exceptions.ShutdownException
+import com.amazonaws.services.kinesis.clientlibrary.exceptions.InvalidStateException
+import org.apache.spark.Logging
+import com.amazonaws.services.kinesis.clientlibrary.interfaces.IRecordProcessorFactory
+import com.amazonaws.services.kinesis.clientlibrary.interfaces.IRecordProcessor
+import scala.util.Random
+import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream
+import org.apache.spark.storage.StorageLevel
+import org.apache.spark.streaming.util.ManualClock
+import org.apache.spark.streaming.util.Clock
+import org.apache.spark.streaming.util.SystemClock
+
+/**
+ * Facade to create the Scala-based or Java-based streams.
+ * Also, contains a reusable utility methods.
+ */
+object KinesisUtils extends Logging {
+  /**
+   * Create an InputDStream that pulls messages from a Kinesis stream.
+   *
+   * @param StreamingContext object
+   * @param app name
+   * @param stream name
+   * @param endpoint
+   * @param checkpoint interval (millis) for Kinesis checkpointing (not Spark checkpointing).
+   * See the Kinesis Spark Streaming documentation for more details on the different types of checkpoints.
+   * The default is TRIM_HORIZON to avoid potential data loss.  However, this presents the risk of processing records more than once.
+   * @param in the absence of Kinesis checkpoint info, this is the worker's initial starting position in the stream.
+   * The values are either the beginning of the stream per Kinesis' limit of 24 hours (InitialPositionInStream.TRIM_HORIZON)
+   *       or the tip of the stream using InitialPositionInStream.LATEST.
+   * The default is StorageLevel.MEMORY_AND_DISK_2 which replicates in-memory and on-disk to 2 nodes total (primary and secondary)
+   *
+   * @return ReceiverInputDStream[Array[Byte]]
+   */
+  def createStream(
+    ssc: StreamingContext,
+    app: String,
+    stream: String,
+    endpoint: String,
+    checkpointIntervalMillis: Long,
+    initialPositionInStream: InitialPositionInStream = InitialPositionInStream.TRIM_HORIZON,
+    storageLevel: StorageLevel = StorageLevel.MEMORY_AND_DISK_2): ReceiverInputDStream[Array[Byte]] = {
+
+    ssc.receiverStream(new KinesisReceiver(app, stream, endpoint, checkpointIntervalMillis, initialPositionInStream, storageLevel))
+  }
+
+  /**
+   * Create a Java-friendly InputDStream that pulls messages from a Kinesis stream.
+   *
+   * @param JavaStreamingContext object
+   * @param app name
+   * @param stream name
+   * @param endpoint
+   * @param checkpoint interval (millis) for Kinesis checkpointing (not Spark checkpointing).
+   * See the Kinesis Spark Streaming documentation for more details on the different types of checkpoints.
+   * The default is TRIM_HORIZON to avoid potential data loss.  However, this presents the risk of processing records more than once.
+   * @param in the absence of Kinesis checkpoint info, this is the worker's initial starting position in the stream.
+   * The values are either the beginning of the stream per Kinesis' limit of 24 hours (InitialPositionInStream.TRIM_HORIZON)
+   *       or the tip of the stream using InitialPositionInStream.LATEST.
+   * The default is StorageLevel.MEMORY_AND_DISK_2 which replicates in-memory and on-disk to 2 nodes total (primary and secondary)
+   *
+   * @return JavaReceiverInputDStream[Array[Byte]]
+   */
+  def createJavaStream(
+    jssc: JavaStreamingContext,
+    app: String,
+    stream: String,
+    endpoint: String,
+    checkpointIntervalMillis: Long,
+    initialPositionInStream: InitialPositionInStream = InitialPositionInStream.TRIM_HORIZON,
+    storageLevel: StorageLevel = StorageLevel.MEMORY_AND_DISK_2): JavaReceiverInputDStream[Array[Byte]] = {
+
+    jssc.receiverStream(new KinesisReceiver(app, stream, endpoint, checkpointIntervalMillis, initialPositionInStream, storageLevel))
+  }
+
+  /**
+   * Create checkpoint state using the existing system clock
+   * @param checkpointIntervalMillis
+   */
+  def createCheckpointState(checkpointIntervalMillis: Long): CheckpointState = {
+    new CheckpointState(checkpointIntervalMillis)
+  }
+
+  /**
+   * Retry the given amount of times with a random backoff time (millis) less than the given maxBackOffMillis
+   *
+   * @param expression expression to evalute
+   * @param numRetriesLeft number of retries left
+   * @param maxBackOffMillis: max millis between retries
+   *
+   * @return Evaluation of the given expression
+   * @throws Unretryable exception, unexpected exception,
+   *  or any exception that persists after numRetriesLeft reaches 0
+   */
+  @annotation.tailrec
+  def retry[T](expression: => T, numRetriesLeft: Int, maxBackOffMillis: Int): T = {
+    util.Try { expression } match {
+      /** If the function succeeded, evaluate to x. */
+      case util.Success(x) => x
+      /** If the function failed, either retry or throw the exception */
+      case util.Failure(e) => e match {
+        /** Retry:  Throttling or other Retryable exception has occurred */
+        case _: ThrottlingException | _: KinesisClientLibDependencyException if numRetriesLeft > 1 => {
+          val backOffMillis = Random.nextInt(maxBackOffMillis)
+          Thread.sleep(backOffMillis)
+          logError(s"Retryable Exception:  Random backOffMillis=${backOffMillis}", e)
+          retry(expression, numRetriesLeft - 1, maxBackOffMillis)
+        }
+        /** Throw:  Shutdown has been requested by the Kinesis Client Library.*/
+        case _: ShutdownException => {
+          logError(s"ShutdownException:  Caught shutdown exception, skipping checkpoint.", e)
+          throw e
+        }
+        /** Throw:  Non-retryable exception has occurred with the Kinesis Client Library */
+        case _: InvalidStateException => {
+          logError(s"InvalidStateException:  Cannot save checkpoint to the DynamoDB table used by the Amazon Kinesis Client Library.  Table likely doesn't exist.", e)
+          throw e
+        }
+        /** Throw:  Unexpected exception has occurred */
+        case _ => {
+          logError(s"Unexpected, non-retryable exception.", e)
+          throw e
+        }
+      }
+    }
+  }
+}
diff --git a/extras/spark-kinesis-asl/src/test/resources/log4j.properties b/extras/spark-kinesis-asl/src/test/resources/log4j.properties
new file mode 100644
index 0000000000000..f6bf583b740cd
--- /dev/null
+++ b/extras/spark-kinesis-asl/src/test/resources/log4j.properties
@@ -0,0 +1,42 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Set everything to be logged to the file streaming/target/unit-tests.log
+log4j.rootCategory=WARN, console
+
+# File appender
+log4j.appender.file=org.apache.log4j.FileAppender
+log4j.appender.file.append=false
+log4j.appender.file.file=target/unit-tests.log
+log4j.appender.file.layout=org.apache.log4j.PatternLayout
+log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %p %c{1}: %m%n
+
+# Console appender
+log4j.appender.console=org.apache.log4j.ConsoleAppender
+log4j.appender.console.target=System.out
+log4j.appender.console.layout=org.apache.log4j.PatternLayout
+log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n
+
+# Settings to quiet third party logs that are too verbose
+log4j.logger.org.eclipse.jetty=WARN
+log4j.logger.org.eclipse.jetty.util.component.AbstractLifeCycle=ERROR
+log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO
+log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO
+
+# Log all Kinesis Streaming messages
+log4j.logger.org.apache.spark.examples.streaming=DEBUG
+log4j.logger.org.apache.spark.streaming.Kinesis=DEBUG
\ No newline at end of file
diff --git a/extras/spark-kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisReceiverSuite.scala b/extras/spark-kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisReceiverSuite.scala
new file mode 100644
index 0000000000000..3d86a7a17fa12
--- /dev/null
+++ b/extras/spark-kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisReceiverSuite.scala
@@ -0,0 +1,267 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.streaming.kinesis
+
+import java.nio.ByteBuffer
+import java.nio.CharBuffer
+import java.nio.charset.Charset
+import scala.collection.JavaConversions.seqAsJavaList
+import org.scalatest.BeforeAndAfter
+import org.scalatest.FunSuite
+import org.scalatest.Matchers
+import org.scalatest.PrivateMethodTester
+import org.scalatest.mock.EasyMockSugar
+import com.amazonaws.services.kinesis.clientlibrary.interfaces.IRecordProcessorCheckpointer
+import com.amazonaws.services.kinesis.model.Record
+import scala.collection.mutable.ArrayBuffer
+import org.apache.spark.streaming.receiver.Receiver
+import com.amazonaws.services.kinesis.clientlibrary.types.ShutdownReason
+import com.amazonaws.services.kinesis.clientlibrary.exceptions.ThrottlingException
+import com.amazonaws.services.kinesis.clientlibrary.exceptions.KinesisClientLibDependencyException
+import com.amazonaws.services.kinesis.clientlibrary.exceptions.ShutdownException
+import com.amazonaws.services.kinesis.clientlibrary.exceptions.InvalidStateException
+import com.amazonaws.services.kinesis.clientlibrary.lib.worker.Worker
+import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream
+import org.apache.spark.storage.StorageLevel
+import org.apache.spark.streaming.util.ManualClock
+import org.apache.spark.streaming.util.SystemClock
+import org.apache.spark.streaming.util.Clock
+
+/**
+ *  Suite of Kinesis streaming receiver tests focusing mostly on the KinesisRecordProcessor 
+ */
+class KinesisReceiverSuite extends FunSuite with Matchers with BeforeAndAfter with EasyMockSugar {
+  val app = "TestKinesisReceiver"
+  val stream = "mySparkStream"
+  val endpoint = "endpoint-url"
+  val workerId = "dummyWorkerId"
+  val shardId = "dummyShardId"
+
+  val record1 = new Record()
+  record1.setData(ByteBuffer.wrap("Spark In Action".getBytes()))
+  val record2 = new Record()
+  record2.setData(ByteBuffer.wrap("Learning Spark".getBytes()))
+  val batch = List[Record](record1, record2)
+  val expectedArrayBuffer = new ArrayBuffer[Array[Byte]]() += record1.getData().array() += record2.getData().array()
+
+  var receiverMock: KinesisReceiver = _
+  var checkpointerMock: IRecordProcessorCheckpointer = _
+  var checkpointClockMock: ManualClock = _
+  var checkpointStateMock: CheckpointState = _
+  var currentClockMock: Clock = _
+
+  before {
+    receiverMock = mock[KinesisReceiver]
+    checkpointerMock = mock[IRecordProcessorCheckpointer]
+    checkpointClockMock = mock[ManualClock]
+    checkpointStateMock = mock[CheckpointState]
+    currentClockMock = mock[Clock]
+  }
+
+  test("process records including store and checkpoint") {
+    val expectedCheckpointIntervalMillis = 10
+    expecting {
+      receiverMock.isStopped().andReturn(false).once()
+      receiverMock.store(expectedArrayBuffer).once()
+      checkpointStateMock.shouldCheckpoint().andReturn(true).once()
+      checkpointerMock.checkpoint().once()
+      checkpointStateMock.advanceCheckpoint().once()
+    }
+    whenExecuting(receiverMock, checkpointerMock, checkpointStateMock) {
+      val recordProcessor = new KinesisRecordProcessor(receiverMock, workerId, checkpointStateMock)
+      recordProcessor.processRecords(batch, checkpointerMock)
+    }
+  }
+
+  test("shouldn't store and checkpoint when receiver is stopped") {
+    expecting {
+      receiverMock.isStopped().andReturn(true).once()
+    }
+    whenExecuting(receiverMock, checkpointerMock, checkpointStateMock) {
+      val recordProcessor = new KinesisRecordProcessor(receiverMock, workerId, checkpointStateMock)
+      recordProcessor.processRecords(batch, checkpointerMock)
+    }
+  }
+
+  test("shouldn't checkpoint when exception occurs during store") {
+    expecting {
+      receiverMock.isStopped().andReturn(false).once()
+      receiverMock.store(expectedArrayBuffer).andThrow(new RuntimeException()).once()
+    }
+    whenExecuting(receiverMock, checkpointerMock, checkpointStateMock) {
+      intercept[RuntimeException] {
+        val recordProcessor = new KinesisRecordProcessor(receiverMock, workerId, checkpointStateMock)
+        recordProcessor.processRecords(batch, checkpointerMock)
+      }
+    }
+  }
+
+  test("should set checkpoint time to currentTime + checkpoint interval upon instantiation") {
+    expecting {
+      currentClockMock.currentTime().andReturn(0).once()
+    }
+    whenExecuting(currentClockMock) {
+    val checkpointIntervalMillis = 10
+    val checkpointState = new CheckpointState(checkpointIntervalMillis, currentClockMock)
+    assert(checkpointState.checkpointClock.currentTime() == checkpointIntervalMillis)
+    }
+  }
+
+  test("should checkpoint if we have exceeded the checkpoint interval") {
+    expecting {
+      currentClockMock.currentTime().andReturn(0).once()
+    }
+    whenExecuting(currentClockMock) {
+      val checkpointState = new CheckpointState(Long.MinValue, currentClockMock)
+      assert(checkpointState.shouldCheckpoint())
+    }
+  }
+
+  test("shouldn't checkpoint if we have not exceeded the checkpoint interval") {
+    expecting {
+      currentClockMock.currentTime().andReturn(0).once()
+    }
+    whenExecuting(currentClockMock) {
+      val checkpointState = new CheckpointState(Long.MaxValue, currentClockMock)
+      assert(!checkpointState.shouldCheckpoint())
+    }
+  }
+
+  test("should add to time when advancing checkpoint") {
+    expecting {
+      currentClockMock.currentTime().andReturn(0).once()
+    }
+    whenExecuting(currentClockMock) {
+      val checkpointIntervalMillis = 10
+      val checkpointState = new CheckpointState(checkpointIntervalMillis, currentClockMock)
+      assert(checkpointState.checkpointClock.currentTime() == checkpointIntervalMillis)
+      checkpointState.advanceCheckpoint()
+      assert(checkpointState.checkpointClock.currentTime() == (2 * checkpointIntervalMillis))
+    }
+  }
+
+  test("shutdown should checkpoint if the reason is TERMINATE") {
+    expecting {
+      checkpointerMock.checkpoint().once()
+    }
+    whenExecuting(checkpointerMock, checkpointStateMock) {
+      val recordProcessor = new KinesisRecordProcessor(receiverMock, workerId, checkpointStateMock)
+      val reason = ShutdownReason.TERMINATE
+      recordProcessor.shutdown(checkpointerMock, reason)
+    }
+  }
+
+  test("shutdown should not checkpoint if the reason is something other than TERMINATE") {
+    expecting {
+    }
+    whenExecuting(checkpointerMock, checkpointStateMock) {
+      val recordProcessor = new KinesisRecordProcessor(receiverMock, workerId, checkpointStateMock)
+      recordProcessor.shutdown(checkpointerMock, ShutdownReason.ZOMBIE)
+      recordProcessor.shutdown(checkpointerMock, null)
+    }
+  }
+
+  test("string record converter") {
+    val expectedString = "http://sparkinaction.com"
+    val expectedByteArray = expectedString.getBytes()
+    val stringRecordSerializer = new KinesisStringRecordSerializer()
+
+    expectedByteArray should be(stringRecordSerializer.serialize(expectedString))
+
+    expectedString should be(stringRecordSerializer.deserialize(expectedByteArray))
+    expectedString should be(stringRecordSerializer.deserialize(stringRecordSerializer.serialize(expectedString)))
+  }
+
+  test("retry success on first attempt") {
+    val expectedIsStopped = false
+    expecting {
+      receiverMock.isStopped().andReturn(expectedIsStopped).once()
+    }
+    whenExecuting(receiverMock) {
+      val actualVal = KinesisUtils.retry(receiverMock.isStopped(), 2, 100)
+      assert(actualVal == expectedIsStopped)
+    }
+  }
+
+  test("retry success on second attempt after a Kinesis throttling exception") {
+    val expectedIsStopped = false
+    expecting {
+      receiverMock.isStopped().andThrow(new ThrottlingException("error message")).andReturn(expectedIsStopped).once()
+    }
+    whenExecuting(receiverMock) {
+      val actualVal = KinesisUtils.retry(receiverMock.isStopped(), 2, 100)
+      assert(actualVal == expectedIsStopped)
+    }
+  }
+
+  test("retry success on second attempt after a Kinesis dependency exception") {
+    val expectedIsStopped = false
+    expecting {
+      receiverMock.isStopped().andThrow(new KinesisClientLibDependencyException("error message")).andReturn(expectedIsStopped).once()
+    }
+    whenExecuting(receiverMock) {
+      val actualVal = KinesisUtils.retry(receiverMock.isStopped(), 2, 100)
+      assert(actualVal == expectedIsStopped)
+    }
+  }
+
+  test("retry failed after a shutdown exception") {
+    expecting {
+      checkpointerMock.checkpoint().andThrow(new ShutdownException("error message")).once()
+    }
+    whenExecuting(checkpointerMock) {
+      intercept[ShutdownException] {
+        KinesisUtils.retry(checkpointerMock.checkpoint(), 2, 100)
+      }
+    }
+  }
+
+  test("retry failed after an invalid state exception") {
+    expecting {
+      checkpointerMock.checkpoint().andThrow(new InvalidStateException("error message")).once()
+    }
+    whenExecuting(checkpointerMock) {
+      intercept[InvalidStateException] {
+        KinesisUtils.retry(checkpointerMock.checkpoint(), 2, 100)
+      }
+    }
+  }
+
+  test("retry failed after unexpected exception") {
+    expecting {
+      checkpointerMock.checkpoint().andThrow(new RuntimeException("error message")).once()
+    }
+    whenExecuting(checkpointerMock) {
+      intercept[RuntimeException] {
+        KinesisUtils.retry(checkpointerMock.checkpoint(), 2, 100)
+      }
+    }
+  }
+
+  test("retry failed after exhausing all retries") {
+    val expectedErrorMessage = "final try error message"
+    expecting {
+      checkpointerMock.checkpoint().andThrow(new ThrottlingException("error message")).andThrow(new ThrottlingException(expectedErrorMessage)).once()
+    }
+    whenExecuting(checkpointerMock) {
+      val exception = intercept[RuntimeException] {
+        KinesisUtils.retry(checkpointerMock.checkpoint(), 2, 100)
+      }
+      exception.getMessage().shouldBe(expectedErrorMessage)
+    }
+  }
+}
diff --git a/pom.xml b/pom.xml
index 05f76d566e9d1..4dfdee12ec7f0 100644
--- a/pom.xml
+++ b/pom.xml
@@ -958,6 +958,14 @@
       </modules>
     </profile>
 
+    <!-- Kinesis integration is not included by default due to ASL-licensed code -->
+    <profile>
+      <id>spark-kinesis-asl</id>
+      <modules>
+        <module>extras/spark-kinesis-asl</module>
+      </modules>
+    </profile>
+
     <profile>
       <id>java8-tests</id>
       <build>
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 599714233c18f..b2c9fd91e0e91 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -88,7 +88,7 @@ object SparkBuild extends Build {
   lazy val mllib = Project("mllib", file("mllib"), settings = mllibSettings) dependsOn(core)
 
   lazy val assemblyProj = Project("assembly", file("assembly"), settings = assemblyProjSettings)
-    .dependsOn(core, graphx, bagel, mllib, streaming, repl, sql) dependsOn(maybeYarn: _*) dependsOn(maybeHive: _*) dependsOn(maybeGanglia: _*)
+    .dependsOn(core, graphx, bagel, mllib, streaming, repl, sql) dependsOn(maybeYarn: _*) dependsOn(maybeHive: _*) dependsOn(maybeGanglia: _*) dependsOn(maybeKinesis: _*)
 
   lazy val assembleDepsTask = TaskKey[Unit]("assemble-deps")
   lazy val assembleDeps = assembleDepsTask := {
@@ -135,6 +135,15 @@ object SparkBuild extends Build {
   val maybeGanglia: Seq[ClasspathDependency] = if (isGangliaEnabled) Seq(gangliaProj) else Seq()
   val maybeGangliaRef: Seq[ProjectReference] = if (isGangliaEnabled) Seq(gangliaProj) else Seq()
 
+  // Include Kinesis integration if the user has enabled Kinesis
+  // This is isolated from the normal build due to ASL-licensed code in the library
+  lazy val isKinesisEnabled = Properties.envOrNone("SPARK_KINESIS_ASL").isDefined
+  lazy val kinesisProj = Project("spark-kinesis-asl", file("extras/spark-kinesis-asl"), settings = kinesisSettings)
+    .dependsOn(streaming % "compile->compile;test->test")
+  val maybeKinesis: Seq[ClasspathDependency] = if (isKinesisEnabled) Seq(kinesisProj) else Seq()
+  val maybeKinesisRef: Seq[ProjectReference] = if (isKinesisEnabled) Seq(kinesisProj) else Seq()
+
+
   // Include the Java 8 project if the JVM version is 8+
   lazy val javaVersion = System.getProperty("java.specification.version")
   lazy val isJava8Enabled = javaVersion.toDouble >= "1.8".toDouble
@@ -171,7 +180,7 @@ object SparkBuild extends Build {
     .dependsOn(core, mllib, graphx, bagel, streaming, hive) dependsOn(allExternal: _*)
 
   // Everything except assembly, hive, tools, java8Tests and examples belong to packageProjects
-  lazy val packageProjects = Seq[ProjectReference](core, repl, bagel, streaming, mllib, graphx, catalyst, sql) ++ maybeYarnRef ++ maybeHiveRef ++ maybeGangliaRef
+  lazy val packageProjects = Seq[ProjectReference](core, repl, bagel, streaming, mllib, graphx, catalyst, sql) ++ maybeYarnRef ++ maybeHiveRef ++ maybeGangliaRef ++ maybeKinesisRef
 
   lazy val allProjects = packageProjects ++ allExternalRefs ++
     Seq[ProjectReference](examples, tools, assemblyProj) ++ maybeJava8Tests
@@ -588,6 +597,14 @@ object SparkBuild extends Build {
     libraryDependencies += "com.codahale.metrics" % "metrics-ganglia" % "3.0.0"
   )
 
+  def kinesisSettings = streamingSettings ++ Seq(
+    name := "spark-kinesis-asl",
+    libraryDependencies ++= Seq(
+      "com.amazonaws" % "amazon-kinesis-client" % "1.1.0",
+      "com.amazonaws" % "aws-java-sdk" % "1.8.3"
+    ) 
+  )
+
   def java8TestsSettings = sharedSettings ++ Seq(
     name := "java8-tests",
     javacOptions := Seq("-target", "1.8", "-source", "1.8"),

From cd68c0d7bb0c1ef38e7c92d0cd6eb4a7ccf2ce27 Mon Sep 17 00:00:00 2001
From: Chris Fregly <chris@fregly.com>
Date: Fri, 18 Jul 2014 20:16:13 -0700
Subject: [PATCH 02/12] fixed typos and backward compatibility

---
 docs/streaming-programming-guide.md                            | 2 +-
 .../spark/streaming/kinesis/KinesisRecordProcessor.scala       | 3 +--
 project/SparkBuild.scala                                       | 2 +-
 3 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/docs/streaming-programming-guide.md b/docs/streaming-programming-guide.md
index c91a23b5c0c94..8f9b7c1fa0f0a 100644
--- a/docs/streaming-programming-guide.md
+++ b/docs/streaming-programming-guide.md
@@ -518,7 +518,7 @@ depending on the checkpoint frequency.</li>
 <li>Failed or latent KinesisReceivers will be detected and automatically shutdown/load-balanced by the KCL.</li>
 <li>If possible, explicitly shutdown the worker if a failure occurs.</li>
 
-Example KinesisWordCount (and JavaKiensisWordCount) notes:
+Example KinesisWordCount (and JavaKinesisWordCount) notes:
 <li>These examples automatically determine the number of threads to run locally based on the number of shards for the stream.</li>
 <li>These examples automatically determine the number of KinesisReceivers/InputDStreams to create based on the number of shards for the stream.</li>
 <li>These examples use InitialPositionInStream.LATEST (tip of stream) vs. InitialPositionInStream.TRIM_HORIZON (back 24 hours) to simplify reasoning about the examples.</li>
diff --git a/extras/spark-kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisRecordProcessor.scala b/extras/spark-kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisRecordProcessor.scala
index 8dd24501fe381..c9e8ecd2ebb14 100644
--- a/extras/spark-kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisRecordProcessor.scala
+++ b/extras/spark-kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisRecordProcessor.scala
@@ -36,8 +36,7 @@ import org.apache.spark.streaming.util.Clock
  *
  * @param Kinesis receiver
  * @param workerId for logging purposes
- * @param checkpoint utils
- * @param Kinesis checkpoint interval (millis)
+ * @param checkpoint state
  */
 private[streaming] class KinesisRecordProcessor(
   receiver: KinesisReceiver,
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 4f4e2d11f1c00..097da182e2902 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -65,7 +65,7 @@ object SparkBuild extends PomBuild {
     }
     if (Properties.envOrNone("SPARK_KINESIS_ASL").isDefined) {
       println("NOTE: SPARK_KINESIS_ASL is deprecated, please use -Pspark-kinesis-asl flag.")
-      profiles ++= Seq("spark-ganglia-lgpl")
+      profiles ++= Seq("spark-kinesis-asl")
     }
     if (Properties.envOrNone("SPARK_HIVE").isDefined) {
       println("NOTE: SPARK_HIVE is deprecated, please use -Phive flag.")

From 828f8aeb1081cf7ad9e5386e1cce933ece9c3d62 Mon Sep 17 00:00:00 2001
From: Chris Fregly <chris@fregly.com>
Date: Mon, 21 Jul 2014 22:20:42 -0700
Subject: [PATCH 03/12] more cleanup

---
 .../sbt_app_core/src/main/scala/SparkApp.scala            | 4 ++--
 extras/spark-kinesis-asl/pom.xml                          | 8 ++++++--
 project/SparkBuild.scala                                  | 4 ++--
 3 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/dev/audit-release/sbt_app_core/src/main/scala/SparkApp.scala b/dev/audit-release/sbt_app_core/src/main/scala/SparkApp.scala
index 225d82a6c4876..e80c6bb614816 100644
--- a/dev/audit-release/sbt_app_core/src/main/scala/SparkApp.scala
+++ b/dev/audit-release/sbt_app_core/src/main/scala/SparkApp.scala
@@ -47,14 +47,14 @@ object SimpleApp {
       System.exit(-1)
     }
     if (foundGanglia) {
-      println("Ganglia sink was loaded via spark-core")
+      println("Ganglia sink was loaded via spark-ganglia-lgpl")
       System.exit(-1)
     }
 
     // Remove kinesis from default build due to ASL license issue
     val foundKinesis = Try(Class.forName("org.apache.spark.streaming.kinesis.KinesisReceiver")).isSuccess
     if (foundKinesis) {
-      println("Kinesis was loaded via spark-core")
+      println("Kinesis was loaded via spark-kinesis-asl")
       System.exit(-1)
     }
   }
diff --git a/extras/spark-kinesis-asl/pom.xml b/extras/spark-kinesis-asl/pom.xml
index 1b4101194d42f..6e2fdc9f13690 100644
--- a/extras/spark-kinesis-asl/pom.xml
+++ b/extras/spark-kinesis-asl/pom.xml
@@ -24,14 +24,18 @@
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
-  <!-- Kinesis integration is not included by default due to ASL-licensed code -->
+  <!-- 
+    Kinesis integration is not included by default due to ASL-licensed code.
+    Note:  This project - if activated - is packaged with the main Spark assembly.
+           This only needs to packaged as a separate jar when running the examples.
+  -->
   <groupId>org.apache.spark</groupId>
   <artifactId>spark-kinesis-asl_2.10</artifactId>
   <packaging>jar</packaging>
   <name>Spark Kinesis Integration</name>
 
   <properties>
-    <sbt.project.name>spark-kinesis-asl</sbt.project.name>
+    <sbt.project.name>kinesis-asl</sbt.project.name>
   </properties>
 
   <dependencies>
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 097da182e2902..a2e2f54745fed 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -36,7 +36,7 @@ object BuildCommons {
       "streaming-zeromq").map(ProjectRef(buildLocation, _))
 
   val optionallyEnabledProjects@Seq(yarn, yarnStable, yarnAlpha, java8Tests, sparkGangliaLgpl, sparkKinesisAsl) =
-    Seq("yarn", "yarn-stable", "yarn-alpha", "java8-tests", "ganglia-lgpl", "spark-kinesis-asl")
+    Seq("yarn", "yarn-stable", "yarn-alpha", "java8-tests", "ganglia-lgpl", "kinesis-asl")
       .map(ProjectRef(buildLocation, _))
 
   val assemblyProjects@Seq(assembly, examples) = Seq("assembly", "examples")
@@ -60,7 +60,7 @@ object SparkBuild extends PomBuild {
     var isAlphaYarn = false
     var profiles: mutable.Seq[String] = mutable.Seq.empty
     if (Properties.envOrNone("SPARK_GANGLIA_LGPL").isDefined) {
-      println("NOTE: SPARK_GANGLIA_LGPL is deprecated, please use -Pganglia-lgpl flag.")
+      println("NOTE: SPARK_GANGLIA_LGPL is deprecated, please use -Pspark-ganglia-lgpl flag.")
       profiles ++= Seq("spark-ganglia-lgpl")
     }
     if (Properties.envOrNone("SPARK_KINESIS_ASL").isDefined) {

From 338997e6e750c206bfb50a654b725be5f33beb07 Mon Sep 17 00:00:00 2001
From: Chris Fregly <chris@fregly.com>
Date: Tue, 22 Jul 2014 08:54:35 -0700
Subject: [PATCH 04/12] improve build docs for kinesis

---
 docs/streaming-programming-guide.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/streaming-programming-guide.md b/docs/streaming-programming-guide.md
index 8f9b7c1fa0f0a..75d320fae4620 100644
--- a/docs/streaming-programming-guide.md
+++ b/docs/streaming-programming-guide.md
@@ -472,10 +472,10 @@ Furthermore, you can also implement your own custom receiver for your sources. S
 Build notes:
 <li>Spark supports a Kinesis Streaming Receiver which is not included in the default build due to licensing restrictions.</li>
 <li>_**Note that by embedding this library you will include [ASL](https://aws.amazon.com/asl/)-licensed code in your Spark package**_.</li>
-<li>For sbt users, set the `SPARK_KINESIS_ASL` environment variable before building.</li>
-<li>For Maven users, enable the `-Pspark-kinesis-asl` profile.</li>
-<li>User applications will need to link to the `spark-kinesis-asl` artifact.</li>
 <li>The Spark Kinesis Streaming Receiver source code, examples, tests, and artifacts live in $SPARK_HOME/extras/spark-kinesis-asl.</li>
+<li>sbt and maven builds:  must enable the `-Pspark-kinesis-asl` profile.</li>
+<li>To build the examples JAR, you must run the maven build with the `-Pspark-kinesis-asl` profile.</li>
+<li>Applications will need to link to the `spark-kinesis-asl` artifact.</li>
 
 Deployment and runtime notes:
 <li>Each shard of a stream is processed by one or more KinesisReceiver's managed by the Kinesis Client Library (KCL) Worker.</li>

From 6c395619dde93a9b8e9137b1150de4ae5129cf4b Mon Sep 17 00:00:00 2001
From: Chris Fregly <chris@fregly.com>
Date: Wed, 23 Jul 2014 20:55:55 -0700
Subject: [PATCH 05/12] parameterized the versions of the aws java sdk and
 kinesis client

---
 extras/spark-kinesis-asl/pom.xml | 4 ++--
 pom.xml                          | 2 ++
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/extras/spark-kinesis-asl/pom.xml b/extras/spark-kinesis-asl/pom.xml
index 6e2fdc9f13690..adb63d5464754 100644
--- a/extras/spark-kinesis-asl/pom.xml
+++ b/extras/spark-kinesis-asl/pom.xml
@@ -52,12 +52,12 @@
     <dependency>
       <groupId>com.amazonaws</groupId>
       <artifactId>amazon-kinesis-client</artifactId>
-      <version>1.1.0</version>
+      <version>${aws.kinesis.client.version}</version>
     </dependency>
     <dependency>
       <groupId>com.amazonaws</groupId>
       <artifactId>aws-java-sdk</artifactId>
-      <version>1.8.3</version>
+      <version>${aws.java.sdk.version}</version>
     </dependency>
     <dependency>
       <groupId>org.scalatest</groupId>
diff --git a/pom.xml b/pom.xml
index 8619e4fa43b6e..0dece16192017 100644
--- a/pom.xml
+++ b/pom.xml
@@ -132,6 +132,8 @@
     <codahale.metrics.version>3.0.0</codahale.metrics.version>
     <avro.version>1.7.6</avro.version>
     <jets3t.version>0.7.1</jets3t.version>
+    <aws.java.sdk.version>1.8.3</aws.java.sdk.version>
+    <aws.kinesis.client.version>1.1.0</aws.kinesis.client.version>
 
     <PermGen>64m</PermGen>
     <MaxPermGen>512m</MaxPermGen>

From 912640cb344c77102e4ca4d884b8b0d0206ed627 Mon Sep 17 00:00:00 2001
From: Chris Fregly <chris@fregly.com>
Date: Wed, 30 Jul 2014 18:03:27 -0700
Subject: [PATCH 06/12] changed the foundKinesis class to be a publically-avail
 class

---
 dev/audit-release/sbt_app_core/src/main/scala/SparkApp.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dev/audit-release/sbt_app_core/src/main/scala/SparkApp.scala b/dev/audit-release/sbt_app_core/src/main/scala/SparkApp.scala
index 07884afaf169e..025f71a1ce45a 100644
--- a/dev/audit-release/sbt_app_core/src/main/scala/SparkApp.scala
+++ b/dev/audit-release/sbt_app_core/src/main/scala/SparkApp.scala
@@ -52,7 +52,7 @@ object SimpleApp {
     }
 
     // Remove kinesis from default build due to ASL license issue
-    val foundKinesis = Try(Class.forName("org.apache.spark.streaming.kinesis.KinesisReceiver")).isSuccess
+    val foundKinesis = Try(Class.forName("org.apache.spark.streaming.kinesis.KinesisUtils")).isSuccess
     if (foundKinesis) {
       println("Kinesis was loaded via kinesis-asl")
       System.exit(-1)

From d17ca6d6a36ddf0a3030eacae0eace3fdd758cc5 Mon Sep 17 00:00:00 2001
From: Chris Fregly <chris@fregly.com>
Date: Thu, 31 Jul 2014 10:00:09 -0700
Subject: [PATCH 07/12] per TD's feedback:  updated docs, simplified the
 KinesisUtils api

---
 .../streaming/JavaKinesisWordCount.java       |  4 +-
 .../examples/streaming/KinesisWordCount.scala |  6 +-
 .../streaming/kinesis/KinesisReceiver.scala   | 26 +++----
 .../streaming/kinesis/KinesisUtils.scala      | 69 +++++++------------
 4 files changed, 38 insertions(+), 67 deletions(-)

diff --git a/extras/kinesis-asl/src/main/java/org/apache/spark/examples/streaming/JavaKinesisWordCount.java b/extras/kinesis-asl/src/main/java/org/apache/spark/examples/streaming/JavaKinesisWordCount.java
index be699a2b8f86e..8543c07aed141 100644
--- a/extras/kinesis-asl/src/main/java/org/apache/spark/examples/streaming/JavaKinesisWordCount.java
+++ b/extras/kinesis-asl/src/main/java/org/apache/spark/examples/streaming/JavaKinesisWordCount.java
@@ -165,14 +165,14 @@ public static void main(String[] args) {
         /** Create the same number of Kinesis Receivers/DStreams as stream shards, then union them all */
         JavaDStream<byte[]> allStreams = KinesisUtils
                 .createStream(jssc, appName, stream, endpoint, checkpointInterval.milliseconds(), 
-                                    InitialPositionInStream.LATEST, StorageLevel.MEMORY_AND_DISK_2());
+                                    InitialPositionInStream.LATEST);
         /** Set the checkpoint interval */
         allStreams.checkpoint(checkpointInterval);
         for (int i = 1; i < numStreams; i++) {
             /** Create a new Receiver/DStream for each stream shard */
             JavaDStream<byte[]> dStream = KinesisUtils
                     .createStream(jssc, appName, stream, endpoint, checkpointInterval.milliseconds(), 
-                                        InitialPositionInStream.LATEST, StorageLevel.MEMORY_AND_DISK_2());            
+                                        InitialPositionInStream.LATEST);
             /** Set the Spark checkpoint interval */
             dStream.checkpoint(checkpointInterval);
 
diff --git a/extras/kinesis-asl/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCount.scala b/extras/kinesis-asl/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCount.scala
index d0e6cdb75cd26..bb036f4d1741e 100644
--- a/extras/kinesis-asl/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCount.scala
+++ b/extras/kinesis-asl/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCount.scala
@@ -159,15 +159,13 @@ object KinesisWordCount extends Logging {
      *   them all. 
      */
     var allStreams: DStream[Array[Byte]] = KinesisUtils.createStream(ssc, appName, stream, 
-        endpoint, checkpointInterval.milliseconds, InitialPositionInStream.LATEST,
-        StorageLevel.MEMORY_AND_DISK_2)
+        endpoint, checkpointInterval.milliseconds, InitialPositionInStream.LATEST)
     /** Set the checkpoint interval */
     allStreams.checkpoint(checkpointInterval)
     for (i <- 1 until numStreams) {
       /** Create a new Receiver/DStream for each stream shard */
       val dStream = KinesisUtils.createStream(ssc, appName, stream, endpoint, 
-          checkpointInterval.milliseconds,
-          InitialPositionInStream.LATEST, StorageLevel.MEMORY_AND_DISK_2)
+          checkpointInterval.milliseconds, InitialPositionInStream.LATEST)
       /** Set the Spark checkpoint interval */
       dStream.checkpoint(checkpointInterval)
 
diff --git a/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisReceiver.scala b/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisReceiver.scala
index 3f0828431fe15..d6e4b7996877c 100644
--- a/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisReceiver.scala
+++ b/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisReceiver.scala
@@ -41,29 +41,23 @@ import com.amazonaws.services.kinesis.clientlibrary.lib.worker.Worker
  * Instances of this class will get shipped to the Spark Streaming Workers 
  *   to run within a Spark Executor.
  *
- * @param appName Kinesis Application Name.  Kinesis apps are mapped to Kinesis streams 
- *   by the Kinesis Client Library.  If you change the app name or stream name, 
- *   the KCL will throw errors.
+ * @param appName unique name for your Kinesis app.  Multiple instances of the app pull from
+ *   the same stream.  The Kinesis Client Library coordinates all load-balancing and 
+ *   failure-recovery.
  * @param stream Kinesis stream name
- * @param endpoint url of Kinesis service
- * @param checkpointIntervalMillis for Kinesis checkpointing (not Spark checkpointing).
- *   See the Kinesis Spark Streaming documentation for more details on the different types 
- *     of checkpoints.
- * @param initialPositionInStream in the absence of Kinesis checkpoint info, this is the worker's initial 
- *   starting position in the stream.
- *   The values are either the beginning of the stream per Kinesis' limit of 24 hours 
- *   (InitialPositionInStream.TRIM_HORIZON) or the tip of the stream 
- *   (InitialPositionInStream.LATEST).
- * @param persistence strategy for RDDs and DStreams.
+ * @param endpoint url of Kinesis service (ie. https://kinesis.us-east-1.amazonaws.com)
+ *   Available endpoints:  http://docs.aws.amazon.com/general/latest/gr/rande.html#ak_region
+ * @param checkpointIntervalMillis interval (millis) for Kinesis checkpointing
+ * @param initialPositionInStream in the absence of a Kinesis checkpoint info, this is the 
+ *   worker's initial starting position in the stream.
  */
 private[kinesis] class KinesisReceiver(
     appName: String,
     stream: String,
     endpoint: String,
     checkpointIntervalMillis: Long,
-    initialPositionInStream: InitialPositionInStream,
-    storageLevel: StorageLevel)
-  extends Receiver[Array[Byte]](storageLevel) with Logging { receiver =>
+    initialPositionInStream: InitialPositionInStream)
+  extends Receiver[Array[Byte]](StorageLevel.MEMORY_AND_DISK_2) with Logging { receiver =>
 
   /**
    * The following vars are built in the onStart() method which executes in the Spark Worker after
diff --git a/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisUtils.scala b/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisUtils.scala
index 8a113bb46ddd9..f3b60f1c49686 100644
--- a/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisUtils.scala
+++ b/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisUtils.scala
@@ -28,8 +28,7 @@ import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionIn
 
 
 /**
- * Facade to create the Scala-based or Java-based streams.
- * Also, contains a reusable utility methods.
+ * Helper class to create Amazon Kinesis Input Stream
  * :: Experimental ::
  */
 @Experimental
@@ -37,25 +36,16 @@ object KinesisUtils extends Logging {
   /**
    * Create an InputDStream that pulls messages from a Kinesis stream.
    *
-   * @param StreamingContext object
-   * @param appName Kinesis Application Name.  Kinesis Apps are mapped to Kinesis Streams 
-   *   by the Kinesis Client Library.  If you change the App name or Stream name, 
-   *   the KCL will throw errors.
-   * @param stream Kinesis Stream Name
-   * @param endpoint url of Kinesis service
-   * @param checkpoint interval (millis) for Kinesis checkpointing (not Spark checkpointing).
-   * See the Kinesis Spark Streaming documentation for more details on the different types 
-   *   of checkpoints.
-   * @param initialPositionInStream in the absence of Kinesis checkpoint info, this is the 
+   * @param ssc StreamingContext
+   * @param appName unique name for your Kinesis app.  Multiple instances of the app pull from
+   *   the same stream.  The Kinesis Client Library coordinates all load-balancing and 
+   *   failure-recovery.
+   * @param stream Kinesis stream name
+   * @param endpoint url of Kinesis service (ie. https://kinesis.us-east-1.amazonaws.com)
+   *   Available endpoints:  http://docs.aws.amazon.com/general/latest/gr/rande.html#ak_region
+   * @param checkpointIntervalMillis interval (millis) for Kinesis checkpointing
+   * @param initialPositionInStream in the absence of a Kinesis checkpoint info, this is the 
    *   worker's initial starting position in the stream.
-   * The values are either the beginning of the stream per Kinesis' limit of 24 hours 
-   *   (InitialPositionInStream.TRIM_HORIZON) or the tip of the stream 
-   *   (InitialPositionInStream.LATEST).
-   * The default is TRIM_HORIZON to avoid potential data loss.  However, this presents the risk 
-   *   of processing records more than once.
-   * @param storageLevel The default is StorageLevel.MEMORY_AND_DISK_2 which replicates in-memory 
-   *   and on-disk to 2 nodes total (primary and secondary)
-   *
    * @return ReceiverInputDStream[Array[Byte]]
    */
   def createStream(
@@ -64,34 +54,24 @@ object KinesisUtils extends Logging {
       stream: String,
       endpoint: String,
       checkpointIntervalMillis: Long,
-      initialPositionInStream: InitialPositionInStream,
-      storageLevel: StorageLevel): ReceiverInputDStream[Array[Byte]] = {
+      initialPositionInStream: InitialPositionInStream): ReceiverInputDStream[Array[Byte]] = {
     ssc.receiverStream(new KinesisReceiver(appName, stream, endpoint, checkpointIntervalMillis, 
-        initialPositionInStream, storageLevel))
+        initialPositionInStream	))
   }
 
   /**
    * Create a Java-friendly InputDStream that pulls messages from a Kinesis stream.
    *
-   * @param JavaStreamingContext object
-   * @param appName Kinesis Application Name.  Kinesis Apps are mapped to Kinesis Streams 
-   *   by the Kinesis Client Library.  If you change the App name or Stream name, 
-   *   the KCL will throw errors.
-   * @param stream Kinesis Stream Name
-   * @param endpoint url of Kinesis service
-   * @param checkpoint interval (millis) for Kinesis checkpointing (not Spark checkpointing).
-   * See the Kinesis Spark Streaming documentation for more details on the different types 
-   *   of checkpoints.
-   * @param initialPositionInStream in the absence of Kinesis checkpoint info, this is the 
+   * @param jssc Java StreamingContext object
+   * @param appName unique name for your Kinesis app.  Multiple instances of the app pull from
+   *   the same stream.  The Kinesis Client Library coordinates all load-balancing and 
+   *   failure-recovery.
+   * @param stream Kinesis stream name
+   * @param endpoint url of Kinesis service (ie. https://kinesis.us-east-1.amazonaws.com)
+   *   Available endpoints:  http://docs.aws.amazon.com/general/latest/gr/rande.html#ak_region
+   * @param checkpointIntervalMillis interval (millis) for Kinesis checkpointing
+   * @param initialPositionInStream in the absence of a Kinesis checkpoint info, this is the
    *   worker's initial starting position in the stream.
-   * The values are either the beginning of the stream per Kinesis' limit of 24 hours 
-   *   (InitialPositionInStream.TRIM_HORIZON) or the tip of the stream 
-   *   (InitialPositionInStream.LATEST).
-   * The default is TRIM_HORIZON to avoid potential data loss.  However, this presents the risk 
-   *   of processing records more than once.
-   * @param storageLevel The default is StorageLevel.MEMORY_AND_DISK_2 which replicates in-memory 
-   *   and on-disk to 2 nodes total (primary and secondary)
-   *
    * @return JavaReceiverInputDStream[Array[Byte]]
    */
   def createStream(
@@ -99,10 +79,9 @@ object KinesisUtils extends Logging {
       appName: String, 
       stream: String, 
       endpoint: String, 
-      checkpointIntervalMillis: Long, 
-      initialPositionInStream: InitialPositionInStream, 
-      storageLevel: StorageLevel): JavaReceiverInputDStream[Array[Byte]] = {
+      checkpointIntervalMillis: Long,
+      initialPositionInStream: InitialPositionInStream): JavaReceiverInputDStream[Array[Byte]] = {
     jssc.receiverStream(new KinesisReceiver(appName, stream, endpoint, checkpointIntervalMillis, 
-        initialPositionInStream, storageLevel))
+        initialPositionInStream))
   }
 }

From bf614e9ed870a3c23670d3783d574b1e4280bd81 Mon Sep 17 00:00:00 2001
From: Chris Fregly <chris@fregly.com>
Date: Thu, 31 Jul 2014 10:33:20 -0700
Subject: [PATCH 08/12] per matei's feedback:  moved the kinesis examples into
 the examples/ dir

---
 examples/pom.xml                              |  5 ++
 .../streaming/JavaKinesisWordCount.java       |  2 +-
 .../examples/streaming/KinesisWordCount.scala |  4 +-
 extras/kinesis-asl/bin/run-kinesis-example    | 60 -------------
 .../kinesis-asl/bin/run-kinesis-example.cmd   | 90 -------------------
 .../kinesis/KinesisRecordProcessor.scala      |  2 +-
 .../streaming/kinesis/KinesisUtils.scala      |  2 +-
 7 files changed, 10 insertions(+), 155 deletions(-)
 rename {extras/kinesis-asl => examples}/src/main/java/org/apache/spark/examples/streaming/JavaKinesisWordCount.java (99%)
 rename {extras/kinesis-asl => examples}/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCount.scala (99%)
 delete mode 100755 extras/kinesis-asl/bin/run-kinesis-example
 delete mode 100755 extras/kinesis-asl/bin/run-kinesis-example.cmd

diff --git a/examples/pom.xml b/examples/pom.xml
index c4ed0f5a6a02b..d87ac68238eb9 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -96,6 +96,11 @@
       <artifactId>spark-streaming-mqtt_${scala.binary.version}</artifactId>
       <version>${project.version}</version>
     </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>kinesis-asl_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+    </dependency>
     <dependency>
       <groupId>org.apache.hbase</groupId>
       <artifactId>hbase</artifactId>
diff --git a/extras/kinesis-asl/src/main/java/org/apache/spark/examples/streaming/JavaKinesisWordCount.java b/examples/src/main/java/org/apache/spark/examples/streaming/JavaKinesisWordCount.java
similarity index 99%
rename from extras/kinesis-asl/src/main/java/org/apache/spark/examples/streaming/JavaKinesisWordCount.java
rename to examples/src/main/java/org/apache/spark/examples/streaming/JavaKinesisWordCount.java
index 8543c07aed141..f13d3c9acce8b 100644
--- a/extras/kinesis-asl/src/main/java/org/apache/spark/examples/streaming/JavaKinesisWordCount.java
+++ b/examples/src/main/java/org/apache/spark/examples/streaming/JavaKinesisWordCount.java
@@ -73,7 +73,7 @@
  * Example:
  *      $ export AWS_ACCESS_KEY_ID=<your-access-key>
  *      $ export AWS_SECRET_KEY=<your-secret-key>
- *      $ $SPARK_HOME/extras/kinesis-asl/bin/run-kinesis-example \
+ *      $ $SPARK_HOME/bin/run-example \
  *            org.apache.spark.examples.streaming.JavaKinesisWordCount mySparkStream \
  *            https://kinesis.us-east-1.amazonaws.com
  *
diff --git a/extras/kinesis-asl/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCount.scala b/examples/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCount.scala
similarity index 99%
rename from extras/kinesis-asl/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCount.scala
rename to examples/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCount.scala
index bb036f4d1741e..50c3889d277fa 100644
--- a/extras/kinesis-asl/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCount.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCount.scala
@@ -68,7 +68,7 @@ import com.amazonaws.services.kinesis.model.PutRecordRequest
  * Example:
  *    $ export AWS_ACCESS_KEY_ID=<your-access-key>
  *    $ export AWS_SECRET_KEY=<your-secret-key>
- *    $ $SPARK_HOME/extras/kinesis-asl/bin/run-kinesis-example \
+ *    $ $SPARK_HOME/bin/run-example \
  *        org.apache.spark.examples.streaming.KinesisWordCount mySparkStream \
  *        https://kinesis.us-east-1.amazonaws.com
  *
@@ -260,7 +260,7 @@ object KinesisWordCount extends Logging {
  * Example:
  *    $ export AWS_ACCESS_KEY_ID=<your-access-key>
  *    $ export AWS_SECRET_KEY=<your-secret-key>
- *    $ $SPARK_HOME/extras/kinesis-asl/bin/run-kinesis-example \
+ *    $ $SPARK_HOME/bin/run-example \
  *         org.apache.spark.examples.streaming.KinesisWordCountProducer mySparkStream \
  *         https://kinesis.us-east-1.amazonaws.com 10 5
  */
diff --git a/extras/kinesis-asl/bin/run-kinesis-example b/extras/kinesis-asl/bin/run-kinesis-example
deleted file mode 100755
index 6cf01fbe773a4..0000000000000
--- a/extras/kinesis-asl/bin/run-kinesis-example
+++ /dev/null
@@ -1,60 +0,0 @@
-#!/usr/bin/env bash
-
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-SCALA_VERSION=2.10
-
-FWDIR="$(cd `dirname $0`/../../../; pwd)"
-export SPARK_HOME="$FWDIR"
-KINESIS_EXAMPLES_DIR="$FWDIR"/extras/kinesis-asl
-
-if [ -n "$1" ]; then
-  EXAMPLE_CLASS="$1"
-  shift
-else
-  echo "Usage: $SPARK_HOME/extras/kinesis-asl/bin/run-kinesis-example <example-class> [example-args]" 1>&2
-  echo "  - set MASTER=XX to use a specific master" 1>&2
-  echo "  - can use abbreviated example class name (e.g. KinesisWordCount, JavaKinesisWordCount)" 1>&2
-  echo "  - must set AWS_ACCESS_KEY_ID and AWS_SECRET_KEY env variables" 1>&2
-  exit 1
-fi
-
-export GLOBIGNORE="*-javadoc.jar:*-sources.jar"
-if [ -f "$FWDIR/RELEASE" ]; then
-  export SPARK_KINESIS_EXAMPLES_JAR=`ls "$FWDIR"/lib/kinesis-asl*.jar`
-elif [ -e "$KINESIS_EXAMPLES_DIR"/target/kinesis-asl_$SCALA_VERSION-*.jar ]; then
-  export SPARK_KINESIS_EXAMPLES_JAR=`ls "$KINESIS_EXAMPLES_DIR"/target/kinesis-asl_$SCALA_VERSION-*.jar`
-fi
-
-if [[ -z $SPARK_KINESIS_EXAMPLES_JAR ]]; then
-  echo "Failed to find Spark Kinesis examples assembly in "$FWDIR"/lib or "$KINESIS_EXAMPLES_DIR"/target" 1>&2
-  echo "You need to build Spark with maven using 'mvn -Pkinesis-asl package' before running this program." 1>&2
-  exit 1
-fi
-
-EXAMPLE_MASTER=${MASTER:-"local[*]"}
-
-if [[ ! $EXAMPLE_CLASS == org.apache.spark.examples.streaming* ]]; then
-  EXAMPLE_CLASS="org.apache.spark.examples.streaming.$EXAMPLE_CLASS"
-fi
-
-"$FWDIR"/bin/spark-submit \
-  --master $EXAMPLE_MASTER \
-  --class $EXAMPLE_CLASS \
-  "$SPARK_KINESIS_EXAMPLES_JAR" \
-  $@
diff --git a/extras/kinesis-asl/bin/run-kinesis-example.cmd b/extras/kinesis-asl/bin/run-kinesis-example.cmd
deleted file mode 100755
index 0980c78391d49..0000000000000
--- a/extras/kinesis-asl/bin/run-kinesis-example.cmd
+++ /dev/null
@@ -1,90 +0,0 @@
-@echo off
-
-rem
-rem Licensed to the Apache Software Foundation (ASF) under one or more
-rem contributor license agreements.  See the NOTICE file distributed with
-rem this work for additional information regarding copyright ownership.
-rem The ASF licenses this file to You under the Apache License, Version 2.0
-rem (the "License"); you may not use this file except in compliance with
-rem the License.  You may obtain a copy of the License at
-rem
-rem    http://www.apache.org/licenses/LICENSE-2.0
-rem
-rem Unless required by applicable law or agreed to in writing, software
-rem distributed under the License is distributed on an "AS IS" BASIS,
-rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-rem See the License for the specific language governing permissions and
-rem limitations under the License.
-rem
-
-set SCALA_VERSION=2.10
-
-rem Figure out where the Spark framework is installed
-set FWDIR=%~dp0..\..\..\
-
-rem Export this as SPARK_HOME
-set SPARK_HOME=%FWDIR%
-
-rem Load environment variables from conf\spark-env.cmd, if it exists
-if exist "%FWDIR%conf\spark-env.cmd" call "%FWDIR%conf\spark-env.cmd"
-
-rem Test that an argument was given
-if not "x%1"=="x" goto arg_given
-  echo Usage: SPARK_HOME/extras/kinesis-asl/bin run-kinesis-example ^<example-class^> [example-args]
-  echo   - set MASTER=XX to use a specific master
-  echo   - can use abbreviated example class name (e.g. KinesisWordCount, JavaKinesisWordCount)
-  echo "  - must set AWS_ACCESS_KEY_ID and AWS_SECRET_KEY env variables" 1>&2
-
-  goto exit
-:arg_given
-
-set KINESIS_EXAMPLES_DIR=%FWDIR%extras\kinesis-asl
-
-rem Figure out the JAR file that our examples were packaged into.
-set SPARK_KINESIS_EXAMPLES_JAR=
-if exist "%FWDIR%RELEASE" (
-  for %%d in ("%FWDIR%lib\kinesis-asl*.jar") do (
-    set SPARK_KINESIS_EXAMPLES_JAR=%%d
-  )
-) else (
-  for %%d in ("%KINESIS_EXAMPLES_DIR%\target\kinesis-asl*.jar") do (
-    set SPARK_KINESIS_EXAMPLES_JAR=%%d
-  )
-)
-if "x%SPARK_KINESIS_EXAMPLES_JAR%"=="x" (
-  echo Failed to find Spark Kinesis examples assembly JAR.
-  echo You need to build Spark with maven using 'mvn -Pkinesis-asl package' before running this program.
-  goto exit
-)
-
-rem Set master from MASTER environment variable if given
-if "x%MASTER%"=="x" (
-  set EXAMPLE_MASTER=local[*]
-) else (
-  set EXAMPLE_MASTER=%MASTER%
-)
-
-rem If the EXAMPLE_CLASS does not start with org.apache.spark.examples.streaming, add that
-set EXAMPLE_CLASS=%1
-set PREFIX=%EXAMPLE_CLASS:~0,25%
-if not %PREFIX%==org.apache.spark.examples.streaming (
-  set EXAMPLE_CLASS=org.apache.spark.examples.streaming.%EXAMPLE_CLASS%
-)
-
-rem Get the tail of the argument list, to skip the first one. This is surprisingly
-rem complicated on Windows.
-set "ARGS="
-:top
-shift
-if "%~1" neq "" (
-  set ARGS=%ARGS% "%~1"
-  goto :top
-)
-if defined ARGS set ARGS=%ARGS:~1%
-
-call "%FWDIR%bin\spark-submit.cmd" ^
-  --master %EXAMPLE_MASTER% ^
-  --class %EXAMPLE_CLASS% ^
-  "%SPARK_KINESIS_EXAMPLES_JAR%" %ARGS%
-
-:exit
diff --git a/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisRecordProcessor.scala b/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisRecordProcessor.scala
index 1c665cf9fd0d3..055e7297706ae 100644
--- a/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisRecordProcessor.scala
+++ b/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisRecordProcessor.scala
@@ -36,7 +36,7 @@ import com.amazonaws.services.kinesis.model.Record
  * @param Kinesis receiver
  * @param workerId for logging purposes
  * @param checkpointState represents the checkpoint state including the next time a 
- * 		checkpoint is needed.  it's injected here for mocking purposes.
+ *   checkpoint is needed.  it's injected here for mocking purposes.
  */
 private[kinesis] class KinesisRecordProcessor(
     receiver: KinesisReceiver,
diff --git a/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisUtils.scala b/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisUtils.scala
index f3b60f1c49686..2b6b833457e35 100644
--- a/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisUtils.scala
+++ b/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisUtils.scala
@@ -56,7 +56,7 @@ object KinesisUtils extends Logging {
       checkpointIntervalMillis: Long,
       initialPositionInStream: InitialPositionInStream): ReceiverInputDStream[Array[Byte]] = {
     ssc.receiverStream(new KinesisReceiver(appName, stream, endpoint, checkpointIntervalMillis, 
-        initialPositionInStream	))
+        initialPositionInStream))
   }
 
   /**

From 74e5c7c3ce99f5cd30d269d62aca31d2b275288c Mon Sep 17 00:00:00 2001
From: Chris Fregly <chris@fregly.com>
Date: Fri, 1 Aug 2014 14:14:42 -0700
Subject: [PATCH 09/12] updated per TD's feedback.  simplified examples,
 updated docs

---
 assembly/pom.xml                              |   2 +-
 bin/run-example                               |   4 +-
 .../src/main/scala/SparkApp.scala             |   4 +-
 dev/audit-release/sbt_app_kinesis/build.sbt   |   2 +-
 docs/streaming-kinesis.md                     |   1 +
 examples/pom.xml                              |   2 +-
 .../streaming/JavaKinesisWordCount.java       | 294 --------------
 .../streaming/JavaKinesisWordCountASL.java    | 187 +++++++++
 .../examples/streaming/KinesisWordCount.scala | 369 ------------------
 .../streaming/KinesisWordCountASL.scala       | 235 +++++++++++
 extras/kinesis-asl/pom.xml                    |   3 +-
 ...ate.scala => KinesisCheckpointState.scala} |  15 +-
 .../streaming/kinesis/KinesisReceiver.scala   |  45 ++-
 .../kinesis/KinesisRecordProcessor.scala      |  87 ++++-
 .../kinesis/KinesisRecordProcessorUtils.scala |  79 ----
 .../kinesis/KinesisRecordSerializer.scala     |  39 --
 .../KinesisStringRecordSerializer.scala       |  44 ---
 .../streaming/kinesis/KinesisUtils.scala      |  78 ++--
 .../kinesis/JavaKinesisStreamSuite.java       |  41 ++
 .../src/test/resources/log4j.properties       |   1 -
 .../kinesis/KinesisReceiverSuite.scala        |  67 ++--
 make-distribution.sh                          |   2 -
 22 files changed, 658 insertions(+), 943 deletions(-)
 delete mode 100644 examples/src/main/java/org/apache/spark/examples/streaming/JavaKinesisWordCount.java
 create mode 100644 examples/src/main/java/org/apache/spark/examples/streaming/JavaKinesisWordCountASL.java
 delete mode 100644 examples/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCount.scala
 create mode 100644 examples/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCountASL.scala
 rename extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/{CheckpointState.scala => KinesisCheckpointState.scala} (82%)
 delete mode 100644 extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisRecordProcessorUtils.scala
 delete mode 100644 extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisRecordSerializer.scala
 delete mode 100644 extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisStringRecordSerializer.scala
 create mode 100644 extras/kinesis-asl/src/test/java/org/apache/spark/streaming/kinesis/JavaKinesisStreamSuite.java

diff --git a/assembly/pom.xml b/assembly/pom.xml
index 824ef383d2e47..76099b074c7ed 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -190,7 +190,7 @@
       <dependencies>
         <dependency>
           <groupId>org.apache.spark</groupId>
-          <artifactId>kinesis-asl_${scala.binary.version}</artifactId>
+          <artifactId>spark-streaming-kinesis-asl_${scala.binary.version}</artifactId>
           <version>${project.version}</version>
         </dependency>
       </dependencies>
diff --git a/bin/run-example b/bin/run-example
index 942706d733122..65d20738260bf 100755
--- a/bin/run-example
+++ b/bin/run-example
@@ -29,7 +29,9 @@ if [ -n "$1" ]; then
 else
   echo "Usage: ./bin/run-example <example-class> [example-args]" 1>&2
   echo "  - set MASTER=XX to use a specific master" 1>&2
-  echo "  - can use abbreviated example class name (e.g. SparkPi, mllib.LinearRegression)" 1>&2
+  echo "  - can use abbreviated example class name relative to com.apache.spark.examples" 1>&2
+  echo "     (e.g. SparkPi, mllib.LinearRegression, streaming.KinesisWordCountASL)" 1>&2
+  echo "  - to run the Kinesis Spark Streaming example, make sure you build with -Pkinesis-asl" 1>&2
   exit 1
 fi
 
diff --git a/dev/audit-release/sbt_app_core/src/main/scala/SparkApp.scala b/dev/audit-release/sbt_app_core/src/main/scala/SparkApp.scala
index 025f71a1ce45a..fc03fec9866a6 100644
--- a/dev/audit-release/sbt_app_core/src/main/scala/SparkApp.scala
+++ b/dev/audit-release/sbt_app_core/src/main/scala/SparkApp.scala
@@ -47,14 +47,14 @@ object SimpleApp {
       System.exit(-1)
     }
     if (foundGanglia) {
-      println("Ganglia sink was loaded via spark-ganglia-lgpl")
+      println("Ganglia sink was loaded via spark-core")
       System.exit(-1)
     }
 
     // Remove kinesis from default build due to ASL license issue
     val foundKinesis = Try(Class.forName("org.apache.spark.streaming.kinesis.KinesisUtils")).isSuccess
     if (foundKinesis) {
-      println("Kinesis was loaded via kinesis-asl")
+      println("Kinesis was loaded via spark-core")
       System.exit(-1)
     }
   }
diff --git a/dev/audit-release/sbt_app_kinesis/build.sbt b/dev/audit-release/sbt_app_kinesis/build.sbt
index 9d821b9a09fbf..5dfd16c185f61 100644
--- a/dev/audit-release/sbt_app_kinesis/build.sbt
+++ b/dev/audit-release/sbt_app_kinesis/build.sbt
@@ -23,7 +23,7 @@ scalaVersion := System.getenv.get("SCALA_VERSION")
 
 libraryDependencies += "org.apache.spark" %% "spark-core" % System.getenv.get("SPARK_VERSION")
 libraryDependencies += "org.apache.spark" %% "spark-streaming" % System.getenv.get("SPARK_VERSION")
-libraryDependencies += "org.apache.spark" %% "kinesis-asl" % System.getenv.get("SPARK_VERSION")
+libraryDependencies += "org.apache.spark" %% "spark-streaming-kinesis-asl" % System.getenv.get("SPARK_VERSION")
 
 resolvers ++= Seq(
   "Spark Release Repository" at System.getenv.get("SPARK_RELEASE_REPOSITORY"),
diff --git a/docs/streaming-kinesis.md b/docs/streaming-kinesis.md
index 4c5ad434a243a..3dd6c01c4cd7c 100644
--- a/docs/streaming-kinesis.md
+++ b/docs/streaming-kinesis.md
@@ -16,6 +16,7 @@ Deployment and runtime notes:
 <li>Each shard of a stream is processed by one or more KinesisReceiver's managed by the Kinesis Client Library (KCL) Worker.</li>
 <li>Said differently, a single KinesisReceiver can process many shards of a stream.</li>
 <li>You never need more KinesisReceivers than the number of shards in your stream.</li>
+<li>You can horizontally scale the receiving by creating more KinesisReceiver/DStreams (up to the number of shards for a given stream)</li>
 <li>The Kinesis assembly jar must also be present on all worker nodes, as they will need access to the Kinesis Client Library.</li>
 <li>/tmp/checkpoint is a valid and accessible directory on all workers (or locally if running in local mode)</li>
 <li>This code uses the DefaultAWSCredentialsProviderChain and searches for credentials in the following order of precedence:<br/>
diff --git a/examples/pom.xml b/examples/pom.xml
index d87ac68238eb9..ffcec8d56f5c6 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -98,7 +98,7 @@
     </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
-      <artifactId>kinesis-asl_${scala.binary.version}</artifactId>
+      <artifactId>spark-streaming-kinesis-asl_${scala.binary.version}</artifactId>
       <version>${project.version}</version>
     </dependency>
     <dependency>
diff --git a/examples/src/main/java/org/apache/spark/examples/streaming/JavaKinesisWordCount.java b/examples/src/main/java/org/apache/spark/examples/streaming/JavaKinesisWordCount.java
deleted file mode 100644
index f13d3c9acce8b..0000000000000
--- a/examples/src/main/java/org/apache/spark/examples/streaming/JavaKinesisWordCount.java
+++ /dev/null
@@ -1,294 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.spark.examples.streaming;
-
-import java.util.List;
-import java.util.regex.Pattern;
-
-import org.apache.log4j.Level;
-import org.apache.log4j.Logger;
-import org.apache.spark.SparkConf;
-import org.apache.spark.api.java.JavaPairRDD;
-import org.apache.spark.api.java.function.FlatMapFunction;
-import org.apache.spark.api.java.function.Function;
-import org.apache.spark.api.java.function.Function2;
-import org.apache.spark.api.java.function.PairFunction;
-import org.apache.spark.storage.StorageLevel;
-import org.apache.spark.streaming.Duration;
-import org.apache.spark.streaming.Milliseconds;
-import org.apache.spark.streaming.api.java.JavaDStream;
-import org.apache.spark.streaming.api.java.JavaPairDStream;
-import org.apache.spark.streaming.api.java.JavaStreamingContext;
-import org.apache.spark.streaming.kinesis.KinesisRecordSerializer;
-import org.apache.spark.streaming.kinesis.KinesisStringRecordSerializer;
-import org.apache.spark.streaming.kinesis.KinesisUtils;
-
-import scala.Tuple2;
-
-import com.amazonaws.auth.DefaultAWSCredentialsProviderChain;
-import com.amazonaws.services.kinesis.AmazonKinesisClient;
-import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream;
-import com.google.common.base.Optional;
-import com.google.common.collect.Lists;
-
-/**
- * Java-friendly Kinesis Spark Streaming WordCount example
- *
- * See http://spark.apache.org/docs/latest/streaming-programming-guide.html for more details 
- * on the Kinesis Spark Streaming integration.
- *
- * This example spins up 1 Kinesis Worker (Spark Streaming Receivers) per shard 
- *   of the given stream.
- * It then starts pulling from the last checkpointed sequence number of the given 
- *   <stream-name> and <endpoint-url>. 
- *
- * Valid endpoint urls:  http://docs.aws.amazon.com/general/latest/gr/rande.html#ak_region
- *
- * This code uses the DefaultAWSCredentialsProviderChain and searches for credentials 
- *  in the following order of precedence: 
- *         Environment Variables - AWS_ACCESS_KEY_ID and AWS_SECRET_KEY
- *         Java System Properties - aws.accessKeyId and aws.secretKey
- *         Credential profiles file - default location (~/.aws/credentials) shared by all AWS SDKs
- *         Instance profile credentials - delivered through the Amazon EC2 metadata service
- *
- * Usage: JavaKinesisWordCount <stream-name> <endpoint-url>
- *         <stream-name> is the name of the Kinesis stream (ie. mySparkStream)
- *         <endpoint-url> is the endpoint of the Kinesis service 
- *           (ie. https://kinesis.us-east-1.amazonaws.com)
- *
- * Example:
- *      $ export AWS_ACCESS_KEY_ID=<your-access-key>
- *      $ export AWS_SECRET_KEY=<your-secret-key>
- *      $ $SPARK_HOME/bin/run-example \
- *            org.apache.spark.examples.streaming.JavaKinesisWordCount mySparkStream \
- *            https://kinesis.us-east-1.amazonaws.com
- *
- * There is a companion helper class called KinesisWordCountProducer which puts dummy data 
- *   onto the Kinesis stream. 
- * Usage instructions for KinesisWordCountProducer are provided in the class definition.
- */
-public final class JavaKinesisWordCount {
-    private static final Pattern WORD_SEPARATOR = Pattern.compile(" ");
-    private static final Logger logger = Logger.getLogger(JavaKinesisWordCount.class);
-
-    /**
-     * Make the constructor private to enforce singleton
-     */
-    private JavaKinesisWordCount() {
-    }
-
-    public static void main(String[] args) {
-        /**
-         * Check that all required args were passed in.
-         */
-        if (args.length < 2) {
-            System.err.println("Usage: JavaKinesisWordCount <stream-name> <kinesis-endpoint-url>");
-            System.exit(1);
-        }
-
-        /**
-         * (This was lifted from the StreamingExamples.scala in order to avoid the dependency on the spark-examples artifact.)
-         * Set reasonable logging levels for streaming if the user has not configured log4j.
-         */
-        boolean log4jInitialized = Logger.getRootLogger().getAllAppenders()
-                .hasMoreElements();
-        if (!log4jInitialized) {
-            /** We first log something to initialize Spark's default logging, then we override the logging level. */
-            Logger.getRootLogger()
-                    .info("Setting log level to [ERROR] for streaming example."
-                            + " To override add a custom log4j.properties to the classpath.");
-            Logger.getRootLogger().setLevel(Level.ERROR);
-            Logger.getLogger("org.apache.spark.examples.streaming").setLevel(Level.DEBUG);
-        }
-
-        /** Populate the appropriate variables from the given args */
-        String stream = args[0];
-        String endpoint = args[1];
-        /** Set the batch interval to a fixed 2000 millis (2 seconds) */
-        Integer batchIntervalMillis = 2000;
-
-        /** Create a Kinesis client in order to determine the number of shards for the given stream */
-        AmazonKinesisClient kinesisClient = new AmazonKinesisClient(
-                new DefaultAWSCredentialsProviderChain());
-        kinesisClient.setEndpoint(endpoint);
-
-        /** Determine the number of shards from the stream */
-        int numShards = kinesisClient.describeStream(stream)
-                .getStreamDescription().getShards().size();
-
-        /** In this example, we're going to create 1 Kinesis Worker/Receiver/DStream for each shard */ 
-        int numStreams = numShards;
-
-        /** Must add 1 more thread than the number of receivers or the output won't show properly from the driver */
-        int numSparkThreads = numStreams + 1;
-
-        /** Set the app name */
-        String appName = "KinesisWordCount";
-
-        /** Setup the Spark config. */
-        SparkConf sparkConfig = new SparkConf().setAppName(appName).setMaster(
-                "local[" + numSparkThreads + "]");
-
-        /**
-         * Set the batch interval.
-         * Records will be pulled from the Kinesis stream and stored as a single DStream within Spark every batch interval.
-         */
-        Duration batchInterval = Milliseconds.apply(batchIntervalMillis);
-
-        /**
-         * It's recommended that you perform a Spark checkpoint between 5 and 10 times the batch interval. 
-         * While this is the Spark checkpoint interval, we're going to use it for the Kinesis checkpoint interval, as well.
-         * For example purposes, we'll just use the batchInterval.
-         */
-        Duration checkpointInterval = Milliseconds.apply(batchIntervalMillis);
-
-        /** Setup the StreamingContext */
-        JavaStreamingContext jssc = new JavaStreamingContext(sparkConfig, batchInterval);
-
-        /** Setup the checkpoint directory used by Spark Streaming */
-        jssc.checkpoint("/tmp/checkpoint");
-
-        /** Create the same number of Kinesis Receivers/DStreams as stream shards, then union them all */
-        JavaDStream<byte[]> allStreams = KinesisUtils
-                .createStream(jssc, appName, stream, endpoint, checkpointInterval.milliseconds(), 
-                                    InitialPositionInStream.LATEST);
-        /** Set the checkpoint interval */
-        allStreams.checkpoint(checkpointInterval);
-        for (int i = 1; i < numStreams; i++) {
-            /** Create a new Receiver/DStream for each stream shard */
-            JavaDStream<byte[]> dStream = KinesisUtils
-                    .createStream(jssc, appName, stream, endpoint, checkpointInterval.milliseconds(), 
-                                        InitialPositionInStream.LATEST);
-            /** Set the Spark checkpoint interval */
-            dStream.checkpoint(checkpointInterval);
-
-            /** Union with the existing streams */
-            allStreams = allStreams.union(dStream);
-        }
-
-        /** This implementation uses the String-based KinesisRecordSerializer impl */
-        final KinesisRecordSerializer<String> recordSerializer = new KinesisStringRecordSerializer();
-
-        /**
-          * Split each line of the union'd DStreams into multiple words using flatMap to produce the collection.
-          * Convert lines of byte[] to multiple Strings by first converting to String, then splitting on WORD_SEPARATOR
-          * We're caching the result here so that we can use it later without having to re-materialize the underlying RDDs.
-          */
-        JavaDStream<String> words = allStreams.flatMap(new FlatMapFunction<byte[], String>() {
-                    /**
-                     * Convert lines of byte[] to multiple words split by WORD_SEPARATOR
-                     * @param byte array
-                     * @return iterable of words split by WORD_SEPARATOR
-                     */
-                    @Override
-                    public Iterable<String> call(byte[] line) {
-                        return Lists.newArrayList(WORD_SEPARATOR.split(recordSerializer.deserialize(line)));
-                    }
-                }).cache();
-
-        /**
-         * Map each word to a (word, 1) tuple so we can reduce/aggregate later.
-         * We're caching the result here so that we can use it later without having
-         *     to re-materialize the underlying RDDs.
-         */
-        JavaPairDStream<String, Integer> wordCounts = words.mapToPair(
-                new PairFunction<String, String, Integer>() {
-                    /**
-                     * Create the (word, 1) tuple
-                     * @param word
-                     * @return (word, 1) tuple
-                     */
-                    @Override
-                    public Tuple2<String, Integer> call(String s) {
-                        return new Tuple2<String, Integer>(s, 1);
-                    }
-                });
-
-        /**
-         * Reduce/aggregate by key
-         * We're caching the result here so that we can use it later without having
-         *     to re-materialize the underlying RDDs.
-         */
-        JavaPairDStream<String, Integer> wordCountsByKey = wordCounts.reduceByKey(
-                new Function2<Integer, Integer, Integer>() {
-                    @Override
-                    public Integer call(Integer i1, Integer i2) {
-                        return i1 + i2;
-                    }
-                }).cache();
-
-        /** Update the running totals of words. */
-        Function2<List<Integer>, Optional<Integer>, Optional<Integer>> updateTotals =
-           /**
-            * @param sequence of new counts
-            * @param current running total (could be None if no current count exists)
-            * @return updated count
-            */
-            new Function2<List<Integer>, Optional<Integer>, Optional<Integer>>() {
-                @Override public Optional<Integer> call(List<Integer> newCounts, Optional<Integer> currentCount) {
-                    Integer currentSum = 0;
-                    if (currentCount.isPresent()) {
-                        currentSum = currentCount.get();
-                    }
-                    Integer newSum = currentSum;
-
-                    for (Integer newCount : newCounts) {
-                        newSum += newCount;
-                    }
-                  return Optional.of(newSum);
-                }
-              };
-
-        /**
-         * Calculate the running totals using the updateTotals method.
-         */
-        JavaPairDStream<String, Integer> wordTotalsByKey = wordCountsByKey.updateStateByKey(updateTotals);
-
-        /**
-         * Sort and print the running word totals.
-         * This is an Output Operation and will materialize the DStream.
-         */
-        sortAndPrint("Word Count Totals By Key", wordTotalsByKey);
-
-        /** Start the streaming context and await termination */
-        jssc.start();
-        jssc.awaitTermination();
-    }
-
-    /**
-     * Sort and print the given dstream.
-     * This is an Output Operation that will materialize the underlying DStream.
-     * Everything up to this point is a lazy Transformation Operation.
-     * 
-     * @param description of the dstream for logging purposes
-     * @param dstream to sort and print
-     */
-    private static void sortAndPrint(final String description, JavaPairDStream<String, Integer> dstream) {
-         dstream.foreachRDD(
-            new Function<JavaPairRDD<String, Integer>, Void>() {
-               public Void call(JavaPairRDD<String, Integer> batch) {
-                  JavaPairRDD<String, Integer> sortedBatch = batch.sortByKey(true);
-                  logger.info(description);
-                  for (Object wordCount: sortedBatch.collect()) {
-                      logger.info(wordCount);
-                  }
-
-                  return null;
-               }
-            });
-    }
-}
diff --git a/examples/src/main/java/org/apache/spark/examples/streaming/JavaKinesisWordCountASL.java b/examples/src/main/java/org/apache/spark/examples/streaming/JavaKinesisWordCountASL.java
new file mode 100644
index 0000000000000..31793aaa020ba
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/examples/streaming/JavaKinesisWordCountASL.java
@@ -0,0 +1,187 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.examples.streaming;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.regex.Pattern;
+
+import org.apache.log4j.Logger;
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.function.FlatMapFunction;
+import org.apache.spark.api.java.function.Function2;
+import org.apache.spark.api.java.function.PairFunction;
+import org.apache.spark.storage.StorageLevel;
+import org.apache.spark.streaming.Duration;
+import org.apache.spark.streaming.api.java.JavaDStream;
+import org.apache.spark.streaming.api.java.JavaPairDStream;
+import org.apache.spark.streaming.api.java.JavaStreamingContext;
+import org.apache.spark.streaming.kinesis.KinesisUtils;
+
+import scala.Tuple2;
+
+import com.amazonaws.auth.DefaultAWSCredentialsProviderChain;
+import com.amazonaws.services.kinesis.AmazonKinesisClient;
+import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream;
+import com.google.common.collect.Lists;
+
+/**
+ * Java-friendly Kinesis Spark Streaming WordCount example
+ *
+ * See http://spark.apache.org/docs/latest/streaming-kinesis.html for more details
+ * on the Kinesis Spark Streaming integration.
+ *
+ * This example spins up 1 Kinesis Worker (Spark Streaming Receiver) per shard
+ *   for the given stream.
+ * It then starts pulling from the last checkpointed sequence number of the given
+ *   <stream-name> and <endpoint-url>. 
+ *
+ * Valid endpoint urls:  http://docs.aws.amazon.com/general/latest/gr/rande.html#ak_region
+ *
+ * This code uses the DefaultAWSCredentialsProviderChain and searches for credentials 
+ *  in the following order of precedence: 
+ *         Environment Variables - AWS_ACCESS_KEY_ID and AWS_SECRET_KEY
+ *         Java System Properties - aws.accessKeyId and aws.secretKey
+ *         Credential profiles file - default location (~/.aws/credentials) shared by all AWS SDKs
+ *         Instance profile credentials - delivered through the Amazon EC2 metadata service
+ *
+ * Usage: JavaKinesisWordCountASL <stream-name> <endpoint-url>
+ *         <stream-name> is the name of the Kinesis stream (ie. mySparkStream)
+ *         <endpoint-url> is the endpoint of the Kinesis service 
+ *           (ie. https://kinesis.us-east-1.amazonaws.com)
+ *
+ * Example:
+ *      $ export AWS_ACCESS_KEY_ID=<your-access-key>
+ *      $ export AWS_SECRET_KEY=<your-secret-key>
+ *      $ $SPARK_HOME/bin/run-example \
+ *            org.apache.spark.examples.streaming.JavaKinesisWordCountASL mySparkStream \
+ *            https://kinesis.us-east-1.amazonaws.com
+ *
+ * There is a companion helper class called KinesisWordCountProducerASL which puts dummy data 
+ *   onto the Kinesis stream. 
+ * Usage instructions for KinesisWordCountProducerASL are provided in the class definition.
+ */
+public final class JavaKinesisWordCountASL {
+    private static final Pattern WORD_SEPARATOR = Pattern.compile(" ");
+    private static final Logger logger = Logger.getLogger(JavaKinesisWordCountASL.class);
+
+    /**
+     * Make the constructor private to enforce singleton
+     */
+    private JavaKinesisWordCountASL() {
+    }
+
+    public static void main(String[] args) {
+        /**
+         * Check that all required args were passed in.
+         */
+        if (args.length < 2) {
+          System.err.println(
+              "|Usage: KinesisWordCount <stream-name> <endpoint-url>\n" +
+              "|    <stream-name> is the name of the Kinesis stream\n" +
+              "|    <endpoint-url> is the endpoint of the Kinesis service\n" +
+              "|                   (e.g. https://kinesis.us-east-1.amazonaws.com)\n");
+          System.exit(1);
+        }
+
+        StreamingExamples.setStreamingLogLevels();
+
+        /** Populate the appropriate variables from the given args */
+        String streamName = args[0];
+        String endpointUrl = args[1];
+        /** Set the batch interval to a fixed 2000 millis (2 seconds) */
+        Duration batchInterval = new Duration(2000);
+
+        /** Create a Kinesis client in order to determine the number of shards for the given stream */
+        AmazonKinesisClient kinesisClient = new AmazonKinesisClient(
+                new DefaultAWSCredentialsProviderChain());
+        kinesisClient.setEndpoint(endpointUrl);
+
+        /** Determine the number of shards from the stream */
+        int numShards = kinesisClient.describeStream(streamName)
+                .getStreamDescription().getShards().size();
+
+        /** In this example, we're going to create 1 Kinesis Worker/Receiver/DStream for each shard */ 
+        int numStreams = numShards;
+
+        /** Must add 1 more thread than the number of receivers or the output won't show properly from the driver */
+        int numSparkThreads = numStreams + 1;
+
+        /** Setup the Spark config. */
+        SparkConf sparkConfig = new SparkConf().setAppName("KinesisWordCount").setMaster(
+                "local[" + numSparkThreads + "]");
+
+        /** Kinesis checkpoint interval.  Same as batchInterval for this example. */
+        Duration checkpointInterval = batchInterval;
+
+        /** Setup the StreamingContext */
+        JavaStreamingContext jssc = new JavaStreamingContext(sparkConfig, batchInterval);
+
+        /** Setup the checkpoint directory used by Spark Streaming */
+        jssc.checkpoint("/tmp/checkpoint");
+
+        /** Create the same number of Kinesis DStreams/Receivers as Kinesis stream's shards */
+        List<JavaDStream<byte[]>> streamsList = new ArrayList<JavaDStream<byte[]>>(numStreams);
+        for (int i = 0; i < streamsList.size(); i++) {
+        	streamsList.add(
+                KinesisUtils.createStream(jssc, streamName, endpointUrl, checkpointInterval, 
+                InitialPositionInStream.LATEST, StorageLevel.MEMORY_AND_DISK_2())
+            );
+        }
+
+        /** Union all the streams if there is more than 1 stream */
+        JavaDStream<byte[]> unionStreams;
+        if (streamsList.size() > 1) {
+            unionStreams = jssc.union(streamsList.get(0), streamsList.subList(1, streamsList.size()));
+        } else {
+            /** Otherwise, just use the 1 stream */
+            unionStreams = streamsList.get(0);
+        }
+
+        /**
+          * Split each line of the union'd DStreams into multiple words using flatMap to produce the collection.
+          * Convert lines of byte[] to multiple Strings by first converting to String, then splitting on WORD_SEPARATOR.
+          */
+        JavaDStream<String> words = unionStreams.flatMap(new FlatMapFunction<byte[], String>() {
+                @Override
+                public Iterable<String> call(byte[] line) {
+                    return Lists.newArrayList(WORD_SEPARATOR.split(new String(line)));
+                }
+            });
+
+        /** Map each word to a (word, 1) tuple, then reduce/aggregate by key. */
+        JavaPairDStream<String, Integer> wordCounts = words.mapToPair(
+            new PairFunction<String, String, Integer>() {
+                @Override
+                public Tuple2<String, Integer> call(String s) {
+                    return new Tuple2<String, Integer>(s, 1);
+                }
+            }).reduceByKey(new Function2<Integer, Integer, Integer>() {
+                @Override
+                public Integer call(Integer i1, Integer i2) {
+                  return i1 + i2;
+                }
+            });
+
+        /** Print the first 10 wordCounts by key */
+        wordCounts.print();
+
+        /** Start the streaming context and await termination */
+        jssc.start();
+        jssc.awaitTermination();
+    }
+}
diff --git a/examples/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCount.scala b/examples/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCount.scala
deleted file mode 100644
index 50c3889d277fa..0000000000000
--- a/examples/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCount.scala
+++ /dev/null
@@ -1,369 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.examples.streaming
-
-import java.nio.ByteBuffer
-
-import scala.util.Random
-
-import org.apache.log4j.Level
-import org.apache.log4j.Logger
-import org.apache.spark.Logging
-import org.apache.spark.SparkConf
-import org.apache.spark.SparkContext.rddToOrderedRDDFunctions
-import org.apache.spark.annotation.Experimental
-import org.apache.spark.storage.StorageLevel
-import org.apache.spark.streaming.Milliseconds
-import org.apache.spark.streaming.StreamingContext
-import org.apache.spark.streaming.StreamingContext.toPairDStreamFunctions
-import org.apache.spark.streaming.dstream.DStream
-import org.apache.spark.streaming.kinesis.KinesisStringRecordSerializer
-import org.apache.spark.streaming.kinesis.KinesisUtils
-
-import com.amazonaws.auth.DefaultAWSCredentialsProviderChain
-import com.amazonaws.services.kinesis.AmazonKinesisClient
-import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream
-import com.amazonaws.services.kinesis.model.PutRecordRequest
-
-/**
- * Kinesis Spark Streaming WordCount example.
- *
- * See http://spark.apache.org/docs/latest/streaming-programming-guide.html for more details on
- *   the Kinesis Spark Streaming integration.
- *
- * This example spins up 1 Kinesis Worker (Spark Streaming Receivers) per shard of the
- *   given stream.
- * It then starts pulling from the last checkpointed sequence number of the given 
- *   <stream-name> and <endpoint-url>. 
- *
- * Valid endpoint urls:  http://docs.aws.amazon.com/general/latest/gr/rande.html#ak_region
- * 
- * This code uses the DefaultAWSCredentialsProviderChain and searches for credentials
- *   in the following order of precedence:
- * Environment Variables - AWS_ACCESS_KEY_ID and AWS_SECRET_KEY
- * Java System Properties - aws.accessKeyId and aws.secretKey
- * Credential profiles file - default location (~/.aws/credentials) shared by all AWS SDKs
- * Instance profile credentials - delivered through the Amazon EC2 metadata service
- *
- * Usage: KinesisWordCount <stream-name> <endpoint-url>
- *   <stream-name> is the name of the Kinesis stream (ie. mySparkStream)
- *   <endpoint-url> is the endpoint of the Kinesis service
- *     (ie. https://kinesis.us-east-1.amazonaws.com)
- *
- * Example:
- *    $ export AWS_ACCESS_KEY_ID=<your-access-key>
- *    $ export AWS_SECRET_KEY=<your-secret-key>
- *    $ $SPARK_HOME/bin/run-example \
- *        org.apache.spark.examples.streaming.KinesisWordCount mySparkStream \
- *        https://kinesis.us-east-1.amazonaws.com
- *
- * There is a companion helper class below called KinesisWordCountProducer which puts
- *   dummy data onto the Kinesis stream.
- * Usage instructions for KinesisWordCountProducer are provided in that class definition.
- */
-object KinesisWordCount extends Logging {
-  val WordSeparator = " "
-
-  def main(args: Array[String]) {
-/**
- * Check that all required args were passed in.
- */
-    if (args.length < 2) {
-      System.err.println("Usage: KinesisWordCount <stream-name> <endpoint-url>")
-      System.exit(1)
-    }
-
-    /**
-     * (This was lifted from the StreamingExamples.scala in order to avoid the dependency
-     *   on the spark-examples artifact.)
-     * Set reasonable logging levels for streaming if the user has not configured log4j.
-     */
-    val log4jInitialized = Logger.getRootLogger.getAllAppenders.hasMoreElements()
-    if (!log4jInitialized) {
-      /** 
-       *  We first log something to initialize Spark's default logging, 
-       *  then we override the logging level. 
-       *  */
-      logInfo("Setting log level to [INFO] for streaming example." +
-        " To override add a custom log4j.properties to the classpath.")
-
-      Logger.getRootLogger().setLevel(Level.INFO)
-      Logger.getLogger("org.apache.spark.examples.streaming").setLevel(Level.DEBUG);
-    }
-
-    /** Populate the appropriate variables from the given args */
-    val Array(stream, endpoint) = args
-    val batchIntervalMillis = 2000
-
-    /** Create a Kinesis client in order to determine the number of shards for the given stream */
-    val kinesisClient = new AmazonKinesisClient(new DefaultAWSCredentialsProviderChain())
-    kinesisClient.setEndpoint(endpoint)
-
-    /** Determine the number of shards from the stream */
-    val numShards = kinesisClient.describeStream(stream).getStreamDescription().getShards().size()
-
-    /** In this example, we're going to create 1 Kinesis Worker/Receiver/DStream for each shard.*/
-    val numStreams = numShards
-
-    /** 
-     *  Must add 1 more thread than the number of receivers or the output won't show properly
-     *   from the driver 
-     */
-    val numSparkThreads = numStreams + 1
-
-    /** Set the app name */
-    val appName = "KinesisWordCount"
-
-    /** Setup the Spark config. */
-    val sparkConfig = new SparkConf().setAppName(appName).setMaster(s"local[$numSparkThreads]")
-
-    /**
-     * Set the batch interval.
-     * Records will be pulled from the Kinesis stream and stored as a single DStream within Spark
-     *   every batch interval.
-     */
-    val batchInterval = Milliseconds(batchIntervalMillis)
-
-    /**
-     * It's recommended that you perform a Spark checkpoint between 5 and 10 times the batch
-     *   interval.
-     * While this is the Spark checkpoint interval, we're going to use it for the Kinesis
-     *   checkpoint interval, as well.
-     * For example purposes, we'll just use the batchInterval.
-     */
-    val checkpointInterval = batchInterval
-
-    /** Setup the StreamingContext */
-    val ssc = new StreamingContext(sparkConfig, batchInterval)
-
-    /** Setup the checkpoint directory used by Spark Streaming */
-    ssc.checkpoint("/tmp/checkpoint");
-
-    /** 
-     *  Create the same number of Kinesis Receivers/DStreams as stream shards, then union
-     *   them all. 
-     */
-    var allStreams: DStream[Array[Byte]] = KinesisUtils.createStream(ssc, appName, stream, 
-        endpoint, checkpointInterval.milliseconds, InitialPositionInStream.LATEST)
-    /** Set the checkpoint interval */
-    allStreams.checkpoint(checkpointInterval)
-    for (i <- 1 until numStreams) {
-      /** Create a new Receiver/DStream for each stream shard */
-      val dStream = KinesisUtils.createStream(ssc, appName, stream, endpoint, 
-          checkpointInterval.milliseconds, InitialPositionInStream.LATEST)
-      /** Set the Spark checkpoint interval */
-      dStream.checkpoint(checkpointInterval)
-
-      /** Union with the existing streams */
-      allStreams = allStreams.union(dStream)
-    }
-
-    /** This implementation uses the String-based KinesisRecordSerializer impl */
-    val recordSerializer = new KinesisStringRecordSerializer()
-
-    /**
-     * Sort and print the given dstream.
-     * This is an Output Operation that will materialize the underlying DStream.
-     * Everything up to this point is a lazy Transformation Operation.
-     * 
-     * @param description of the dstream for logging purposes
-     * @param dstream to sort and print
-     */
-    def sortAndPrint(description: String, dstream: DStream[(String,Int)]) = {
-      dstream.foreachRDD((batch, endOfWindowTime) => {
-        val sortedBatch = batch.sortByKey(true)
-          logInfo(s"$description @ $endOfWindowTime")
-          sortedBatch.collect().foreach(
-            wordCount => logInfo(s"$wordCount"))
-        }
-      )
-    }
-
-    /**
-     * Split each line of the union'd DStreams into multiple words using flatMap
-     *   to produce the collection.
-     * Convert lines of Array[Byte] to multiple Strings by first converting to String,
-     *   then splitting on WORD_SEPARATOR
-     * We're caching the result here so that we can use it later without having 
-     *   to re-materialize the underlying RDDs.
-     */
-    val words = allStreams.flatMap(line => recordSerializer.deserialize(line)
-      .split(WordSeparator)).cache()
-
-    /**
-     * Map each word to a (word, 1) tuple so we can reduce/aggregate later.
-     * We're caching the result here so that we can use it later without having
-     * to re-materialize the underlying RDDs.
-     */
-    val wordCounts = words.map(word => (word, 1))
-
-    /**
-     * Reduce/aggregate by key.
-     * We're caching the result here so that we can use it later without having
-     * to re-materialize the underlying RDDs.
-     */
-    val wordCountsByKey = wordCounts.reduceByKey((left, right) => left + right)
-
-    /**
-     * Update the running totals of words.
-     *
-     * @param sequence of new counts
-     * @param current running total (could be None if no current count exists)
-     */
-    def updateTotals = (newCounts: Seq[Int], currentCounts: Option[Int]) => {
-      val newCount = newCounts.foldLeft(0)((left, right) => left + right)
-      val currentCount = currentCounts.getOrElse(0)
-      Some(newCount + currentCount)
-    }
-
-    /**
-     * Calculate the running totals using the updateTotals method.
-     */
-    val wordTotalsByKey = wordCountsByKey.updateStateByKey[Int](updateTotals)
-
-    /**
-     * Sort and print the running word totals.
-     * This is an Output Operation and will materialize the DStream.
-     */
-    sortAndPrint("Word Count Totals By Key", wordTotalsByKey)
-
-    /** Start the streaming context and await termination */
-    ssc.start()
-    ssc.awaitTermination()
-  }
-}
-
-/**
- * Usage: KinesisWordCountProducer <stream-name> <kinesis-endpoint-url>
- *     <recordsPerSec> <wordsPerRecord>
- *   <stream-name> is the name of the Kinesis stream (ie. mySparkStream)
- *   <kinesis-endpoint-url> is the endpoint of the Kinesis service
- *     (ie. https://kinesis.us-east-1.amazonaws.com)
- *   <records-per-sec> is the rate of records per second to put onto the stream
- *   <words-per-record> is the rate of records per second to put onto the stream
- *
- * Example:
- *    $ export AWS_ACCESS_KEY_ID=<your-access-key>
- *    $ export AWS_SECRET_KEY=<your-secret-key>
- *    $ $SPARK_HOME/bin/run-example \
- *         org.apache.spark.examples.streaming.KinesisWordCountProducer mySparkStream \
- *         https://kinesis.us-east-1.amazonaws.com 10 5
- */
-private[streaming]
-object KinesisWordCountProducer extends Logging {
-  val MaxRandomInts = 10
-
-  def main(args: Array[String]) {
-    if (args.length < 4) {
-      System.err.println("Usage: KinesisWordCountProducer <stream-name> <endpoint-url>" +
-          " <records-per-sec> <words-per-record>")
-      System.exit(1)
-    }
-
-    /**
-     * (This was lifted from the StreamingExamples.scala in order to avoid the dependency
-     *   on the spark-examples artifact.)
-     * Set reasonable logging levels for streaming if the user has not configured log4j.
-     */
-    val log4jInitialized = Logger.getRootLogger.getAllAppenders.hasMoreElements
-    if (!log4jInitialized) {
-      /** 
-       *  We first log something to initialize Spark's default logging, then we override
-       *   the logging level. 
-       */
-      logInfo("Setting log level to [INFO] for streaming example." +
-        " To override add a custom log4j.properties to the classpath.")
-
-      Logger.getRootLogger().setLevel(Level.INFO)
-      Logger.getLogger("org.apache.spark.examples.streaming").setLevel(Level.DEBUG);
-    }
-
-    /** Populate the appropriate variables from the given args */
-    val Array(stream, endpoint, recordsPerSecond, wordsPerRecord) = args
-
-    /** Generate the records and return the totals */
-    val totals: Seq[(Int, Int)] = generate(stream, endpoint, recordsPerSecond.toInt,
-        wordsPerRecord.toInt)
-
-    logInfo("Totals")
-    /** Print the array of (index, total) tuples */
-    totals.foreach(total => logInfo(total.toString()))
-  }
-
-  def generate(stream: String,
-      endpoint: String,
-      recordsPerSecond: Int,
-      wordsPerRecord: Int): Seq[(Int, Int)] = {
-    val WORD_SEPARATOR = " "
-
-    /** Create the Kinesis client */
-    val kinesisClient = new AmazonKinesisClient(new DefaultAWSCredentialsProviderChain())
-    kinesisClient.setEndpoint(endpoint)
-
-    logInfo(s"Putting records onto stream $stream and endpoint $endpoint at a rate of" +
-        s" $recordsPerSecond records per second and $wordsPerRecord words per record");
-
-    /** Create the String-based record serializer */
-    val recordSerializer = new KinesisStringRecordSerializer()
-
-    val totals = new Array[Int](MaxRandomInts)
-    /** Put String records onto the stream per the given recordPerSec and wordsPerRecord */
-    for (i <- 1 to 5) {
-      /** Generate recordsPerSec records to put onto the stream */
-      val records = (1 to recordsPerSecond.toInt).map { recordNum =>
-        /** 
-         *  Randomly generate each wordsPerRec words between 0 (inclusive)
-         *  and MAX_RANDOM_INTS (exclusive) 
-         */
-        val data = (1 to wordsPerRecord.toInt).map(x => {
-          /** Generate the random int */
-          val randomInt = Random.nextInt(MaxRandomInts)
-
-          /** Keep track of the totals */
-          totals(randomInt) += 1
-
-          /** Convert the Int to a String */
-          randomInt.toString()
-        })
-          /** Create a String of randomInts separated by WORD_SEPARATOR */
-          .mkString(WORD_SEPARATOR)
-
-        /** Create a partitionKey based on recordNum */
-        val partitionKey = s"partitionKey-$recordNum"
-
-        /** Create a PutRecordRequest with an Array[Byte] version of the data */
-        val putRecordRequest = new PutRecordRequest().withStreamName(stream)
-            .withPartitionKey(partitionKey)
-            .withData(ByteBuffer.wrap(recordSerializer.serialize(data)));
-
-        /** Put the record onto the stream and capture the PutRecordResult */
-        val putRecordResult = kinesisClient.putRecord(putRecordRequest);
-
-        logInfo(s"Successfully put record with partitionKey $partitionKey and shardId" +
-            s" ${putRecordResult.getShardId()} and data $data and endpoint $endpoint and stream" +
-            s" $stream")
-      }
-
-      /** Sleep for a second */
-      Thread.sleep(1000)
-    }
-
-    /** Convert the totals to (index, total) tuple */
-    (0 to (MaxRandomInts - 1)).zip(totals)
-  }
-}
diff --git a/examples/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCountASL.scala b/examples/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCountASL.scala
new file mode 100644
index 0000000000000..865eea433aeb9
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCountASL.scala
@@ -0,0 +1,235 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.streaming
+
+import java.nio.ByteBuffer
+
+import scala.util.Random
+
+import org.apache.spark.Logging
+import org.apache.spark.SparkConf
+import org.apache.spark.storage.StorageLevel
+import org.apache.spark.streaming.Milliseconds
+import org.apache.spark.streaming.StreamingContext
+import org.apache.spark.streaming.StreamingContext.toPairDStreamFunctions
+import org.apache.spark.streaming.kinesis.KinesisUtils
+
+import com.amazonaws.auth.DefaultAWSCredentialsProviderChain
+import com.amazonaws.services.kinesis.AmazonKinesisClient
+import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream
+import com.amazonaws.services.kinesis.model.PutRecordRequest
+
+/**
+ * Kinesis Spark Streaming WordCount example.
+ *
+ * See http://spark.apache.org/docs/latest/streaming-kinesis.html for more details on
+ *   the Kinesis Spark Streaming integration.
+ *
+ * This example spins up 1 Kinesis Worker (Spark Streaming Receiver) per shard 
+ *   for the given stream.
+ * It then starts pulling from the last checkpointed sequence number of the given 
+ *   <stream-name> and <endpoint-url>. 
+ *
+ * Valid endpoint urls:  http://docs.aws.amazon.com/general/latest/gr/rande.html#ak_region
+ * 
+ * This code uses the DefaultAWSCredentialsProviderChain and searches for credentials
+ *   in the following order of precedence:
+ * Environment Variables - AWS_ACCESS_KEY_ID and AWS_SECRET_KEY
+ * Java System Properties - aws.accessKeyId and aws.secretKey
+ * Credential profiles file - default location (~/.aws/credentials) shared by all AWS SDKs
+ * Instance profile credentials - delivered through the Amazon EC2 metadata service
+ *
+ * Usage: KinesisWordCountASL <stream-name> <endpoint-url>
+ *   <stream-name> is the name of the Kinesis stream (ie. mySparkStream)
+ *   <endpoint-url> is the endpoint of the Kinesis service
+ *     (ie. https://kinesis.us-east-1.amazonaws.com)
+ *
+ * Example:
+ *    $ export AWS_ACCESS_KEY_ID=<your-access-key>
+ *    $ export AWS_SECRET_KEY=<your-secret-key>
+ *    $ $SPARK_HOME/bin/run-example \
+ *        org.apache.spark.examples.streaming.KinesisWordCountASL mySparkStream \
+ *        https://kinesis.us-east-1.amazonaws.com
+ *
+ * There is a companion helper class below called KinesisWordCountProducerASL which puts
+ *   dummy data onto the Kinesis stream.
+ * Usage instructions for KinesisWordCountProducerASL are provided in that class definition.
+ */
+object KinesisWordCountASL extends Logging {
+  def main(args: Array[String]) {
+/**
+ * Check that all required args were passed in.
+ */
+    if (args.length < 2) {
+      System.err.println(
+        """
+          |Usage: KinesisWordCount <stream-name> <endpoint-url>
+          |    <stream-name> is the name of the Kinesis stream
+          |    <endpoint-url> is the endpoint of the Kinesis service
+          |                   (e.g. https://kinesis.us-east-1.amazonaws.com)
+        """.stripMargin)
+      System.exit(1)
+    }
+
+    StreamingExamples.setStreamingLogLevels()
+    
+    /** Populate the appropriate variables from the given args */
+    val Array(streamName, endpointUrl) = args
+
+    /** Determine the number of shards from the stream */
+    val kinesisClient = new AmazonKinesisClient(new DefaultAWSCredentialsProviderChain())
+    kinesisClient.setEndpoint(endpointUrl)
+    val numShards = kinesisClient.describeStream(streamName).getStreamDescription().getShards().size()
+
+    /** In this example, we're going to create 1 Kinesis Worker/Receiver/DStream for each shard. */
+    val numStreams = numShards
+
+    /** 
+     *  numSparkThreads should be 1 more thread than the number of receivers.
+     *  This leaves one thread available for actually processing the data.
+     */
+    val numSparkThreads = numStreams + 1
+
+    /** Setup the and SparkConfig and StreamingContext */
+    /** Spark Streaming batch interval */
+    val batchInterval = Milliseconds(2000)    
+    val sparkConfig = new SparkConf().setAppName("KinesisWordCount").setMaster(s"local[$numSparkThreads]")
+    val ssc = new StreamingContext(sparkConfig, batchInterval)
+    /** Setup the checkpoint directory used by Spark Streaming */
+    ssc.checkpoint("/tmp/checkpoint");
+
+    /** Kinesis checkpoint interval.  Same as batchInterval for this example. */
+    val kinesisCheckpointInterval = batchInterval
+
+    /** Create the same number of Kinesis DStreams/Receivers as Kinesis stream's shards */
+    val kinesisStreams = (0 until numStreams).map { i =>
+      KinesisUtils.createStream(ssc, streamName, endpointUrl, kinesisCheckpointInterval,
+          InitialPositionInStream.LATEST, StorageLevel.MEMORY_AND_DISK_2)
+    }
+
+    /** Union all the streams */
+    val unionStreams = ssc.union(kinesisStreams)
+
+    /** Convert each line of Array[Byte] to String, split into words, and count them */
+    val words = unionStreams.flatMap(byteArray => new String(byteArray)
+      .split(" "))
+
+    /** Map each word to a (word, 1) tuple so we can reduce/aggregate by key. */
+    val wordCounts = words.map(word => (word, 1)).reduceByKey(_ + _)
+
+    /** Print the first 10 wordCounts by key */
+    wordCounts.print()
+
+    /** Start the streaming context and await termination */
+    ssc.start()
+    ssc.awaitTermination()
+  }
+}
+
+/**
+ * Usage: KinesisWordCountProducerASL <stream-name> <kinesis-endpoint-url>
+ *     <recordsPerSec> <wordsPerRecord>
+ *   <stream-name> is the name of the Kinesis stream (ie. mySparkStream)
+ *   <kinesis-endpoint-url> is the endpoint of the Kinesis service
+ *     (ie. https://kinesis.us-east-1.amazonaws.com)
+ *   <records-per-sec> is the rate of records per second to put onto the stream
+ *   <words-per-record> is the rate of records per second to put onto the stream
+ *
+ * Example:
+ *    $ export AWS_ACCESS_KEY_ID=<your-access-key>
+ *    $ export AWS_SECRET_KEY=<your-secret-key>
+ *    $ $SPARK_HOME/bin/run-example \
+ *         org.apache.spark.examples.streaming.KinesisWordCountProducerASL mySparkStream \
+ *         https://kinesis.us-east-1.amazonaws.com 10 5
+ */
+object KinesisWordCountProducerASL {
+  def main(args: Array[String]) {
+    if (args.length < 4) {
+      System.err.println("Usage: KinesisWordCountProducerASL <stream-name> <endpoint-url>" +
+          " <records-per-sec> <words-per-record>")
+      System.exit(1)
+    }
+
+    StreamingExamples.setStreamingLogLevels()
+
+    /** Populate the appropriate variables from the given args */
+    val Array(stream, endpoint, recordsPerSecond, wordsPerRecord) = args
+
+    /** Generate the records and return the totals */
+    val totals = generate(stream, endpoint, recordsPerSecond.toInt, wordsPerRecord.toInt)
+
+    /** Print the array of (index, total) tuples */
+    println("Totals")
+    totals.foreach(total => println(total.toString()))
+  }
+
+  def generate(stream: String,
+      endpoint: String,
+      recordsPerSecond: Int,
+      wordsPerRecord: Int): Seq[(Int, Int)] = {
+
+    val MaxRandomInts = 10
+
+    /** Create the Kinesis client */
+    val kinesisClient = new AmazonKinesisClient(new DefaultAWSCredentialsProviderChain())
+    kinesisClient.setEndpoint(endpoint)
+
+    println(s"Putting records onto stream $stream and endpoint $endpoint at a rate of" +
+      s" $recordsPerSecond records per second and $wordsPerRecord words per record");
+
+    val totals = new Array[Int](MaxRandomInts)
+    /** Put String records onto the stream per the given recordPerSec and wordsPerRecord */
+    for (i <- 1 to 5) {
+
+      /** Generate recordsPerSec records to put onto the stream */
+      val records = (1 to recordsPerSecond.toInt).map { recordNum =>
+        /** 
+         *  Randomly generate each wordsPerRec words between 0 (inclusive)
+         *  and MAX_RANDOM_INTS (exclusive) 
+         */
+        val data = (1 to wordsPerRecord.toInt).map(x => {
+          /** Generate the random int */
+          val randomInt = Random.nextInt(MaxRandomInts)
+
+          /** Keep track of the totals */
+          totals(randomInt) += 1
+
+          randomInt.toString()
+        }).mkString(" ")
+
+        /** Create a partitionKey based on recordNum */
+        val partitionKey = s"partitionKey-$recordNum"
+
+        /** Create a PutRecordRequest with an Array[Byte] version of the data */
+        val putRecordRequest = new PutRecordRequest().withStreamName(stream)
+            .withPartitionKey(partitionKey)
+            .withData(ByteBuffer.wrap(data.getBytes()));
+
+        /** Put the record onto the stream and capture the PutRecordResult */
+        val putRecordResult = kinesisClient.putRecord(putRecordRequest);
+      }
+
+      /** Sleep for a second */
+      Thread.sleep(1000)
+      println("Sent " + recordsPerSecond + " records")
+    }
+
+    /** Convert the totals to (index, total) tuple */
+    (0 to (MaxRandomInts - 1)).zip(totals)
+  }
+}
diff --git a/extras/kinesis-asl/pom.xml b/extras/kinesis-asl/pom.xml
index 0afb076d1f0eb..739a010200dee 100644
--- a/extras/kinesis-asl/pom.xml
+++ b/extras/kinesis-asl/pom.xml
@@ -27,10 +27,9 @@
   <!-- 
     Kinesis integration is not included by default due to ASL-licensed code.
     Note:  This project - if activated - is packaged with the main Spark assembly.
-           This only needs to packaged as a separate jar when running the examples.
   -->
   <groupId>org.apache.spark</groupId>
-  <artifactId>kinesis-asl_2.10</artifactId>
+  <artifactId>spark-streaming-kinesis-asl_2.10</artifactId>
   <packaging>jar</packaging>
   <name>Spark Kinesis Integration</name>
 
diff --git a/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/CheckpointState.scala b/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisCheckpointState.scala
similarity index 82%
rename from extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/CheckpointState.scala
rename to extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisCheckpointState.scala
index febde542723b2..a541a72614cbf 100644
--- a/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/CheckpointState.scala
+++ b/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisCheckpointState.scala
@@ -17,6 +17,7 @@
 package org.apache.spark.streaming.kinesis
 
 import org.apache.spark.Logging
+import org.apache.spark.streaming.Duration
 import org.apache.spark.streaming.util.Clock
 import org.apache.spark.streaming.util.ManualClock
 import org.apache.spark.streaming.util.SystemClock
@@ -24,18 +25,18 @@ import org.apache.spark.streaming.util.SystemClock
 /**
  * This is a helper class for managing checkpoint clocks.
  *
- * @param checkpoint interval in millis
- * @param current clock.  if none specified, will default to current SystemClock
+ * @param checkpointInterval 
+ * @param currentClock.  Default to current SystemClock if none is passed in (mocking purposes)
  */
-private[kinesis] class CheckpointState(
-    checkpointIntervalMillis: Long, 
+private[kinesis] class KinesisCheckpointState(
+    checkpointInterval: Duration, 
     currentClock: Clock = new SystemClock())
   extends Logging {
   /**
-   * Initialize the checkpoint clock using the given currentClock + checkpointIntervalMillis
+   * Initialize the checkpoint clock using the given currentClock + checkpointInterval millis
    */
   val checkpointClock = new ManualClock()
-  checkpointClock.setTime(currentClock.currentTime() + checkpointIntervalMillis)
+  checkpointClock.setTime(currentClock.currentTime() + checkpointInterval.milliseconds)
 
   /**
    * Check if it's time to checkpoint based on the current time and the derived time 
@@ -51,6 +52,6 @@ private[kinesis] class CheckpointState(
    * Advance the checkpoint clock by the checkpoint interval.
    */
   def advanceCheckpoint() = {
-    checkpointClock.addToTime(checkpointIntervalMillis)
+    checkpointClock.addToTime(checkpointInterval.milliseconds)
   }
 }
diff --git a/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisReceiver.scala b/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisReceiver.scala
index d6e4b7996877c..7d3897d45c77f 100644
--- a/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisReceiver.scala
+++ b/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisReceiver.scala
@@ -21,6 +21,7 @@ import java.util.UUID
 
 import org.apache.spark.Logging
 import org.apache.spark.storage.StorageLevel
+import org.apache.spark.streaming.Duration
 import org.apache.spark.streaming.receiver.Receiver
 
 import com.amazonaws.auth.AWSCredentialsProvider
@@ -41,23 +42,33 @@ import com.amazonaws.services.kinesis.clientlibrary.lib.worker.Worker
  * Instances of this class will get shipped to the Spark Streaming Workers 
  *   to run within a Spark Executor.
  *
- * @param appName unique name for your Kinesis app.  Multiple instances of the app pull from
- *   the same stream.  The Kinesis Client Library coordinates all load-balancing and 
- *   failure-recovery.
- * @param stream Kinesis stream name
- * @param endpoint url of Kinesis service (ie. https://kinesis.us-east-1.amazonaws.com)
- *   Available endpoints:  http://docs.aws.amazon.com/general/latest/gr/rande.html#ak_region
- * @param checkpointIntervalMillis interval (millis) for Kinesis checkpointing
- * @param initialPositionInStream in the absence of a Kinesis checkpoint info, this is the 
- *   worker's initial starting position in the stream.
+ * @param appName  Kinesis application name. Kinesis Apps are mapped to Kinesis Streams
+ *                 by the Kinesis Client Library.  If you change the App name or Stream name,
+ *                 the KCL will throw errors.  This usually requires deleting the backing  
+ *                 DynamoDB table with the same name this Kinesis application.
+ * @param streamName   Kinesis stream name
+ * @param endpointUrl  Url of Kinesis service (e.g., https://kinesis.us-east-1.amazonaws.com)
+ * @param checkpointInterval  Checkpoint interval for Kinesis checkpointing.
+ *                            See the Kinesis Spark Streaming documentation for more
+ *                            details on the different types of checkpoints.
+ * @param initialPositionInStream  In the absence of Kinesis checkpoint info, this is the
+ *                                 worker's initial starting position in the stream.
+ *                                 The values are either the beginning of the stream
+ *                                 per Kinesis' limit of 24 hours
+ *                                 (InitialPositionInStream.TRIM_HORIZON) or
+ *                                 the tip of the stream (InitialPositionInStream.LATEST).
+ * @param storageLevel Storage level to use for storing the received objects
+ *
+ * @return ReceiverInputDStream[Array[Byte]]   
  */
 private[kinesis] class KinesisReceiver(
     appName: String,
-    stream: String,
-    endpoint: String,
-    checkpointIntervalMillis: Long,
-    initialPositionInStream: InitialPositionInStream)
-  extends Receiver[Array[Byte]](StorageLevel.MEMORY_AND_DISK_2) with Logging { receiver =>
+    streamName: String,
+    endpointUrl: String,
+    checkpointInterval: Duration,
+    initialPositionInStream: InitialPositionInStream,
+    storageLevel: StorageLevel)
+  extends Receiver[Array[Byte]](storageLevel) with Logging { receiver =>
 
   /**
    * The following vars are built in the onStart() method which executes in the Spark Worker after
@@ -109,12 +120,12 @@ private[kinesis] class KinesisReceiver(
   override def onStart() {
     workerId = InetAddress.getLocalHost.getHostAddress() + ":" + UUID.randomUUID()
     credentialsProvider = new DefaultAWSCredentialsProviderChain()
-    kinesisClientLibConfiguration = new KinesisClientLibConfiguration(appName, stream,
-      credentialsProvider, workerId).withKinesisEndpoint(endpoint)
+    kinesisClientLibConfiguration = new KinesisClientLibConfiguration(appName, streamName,
+      credentialsProvider, workerId).withKinesisEndpoint(endpointUrl)
       .withInitialPositionInStream(initialPositionInStream).withTaskBackoffTimeMillis(500)
     recordProcessorFactory = new IRecordProcessorFactory {
       override def createProcessor: IRecordProcessor = new KinesisRecordProcessor(receiver,
-        workerId, new CheckpointState(checkpointIntervalMillis))
+        workerId, new KinesisCheckpointState(checkpointInterval))
     }
     worker = new Worker(recordProcessorFactory, kinesisClientLibConfiguration)
     worker.run()
diff --git a/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisRecordProcessor.scala b/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisRecordProcessor.scala
index 055e7297706ae..5d201819a8f87 100644
--- a/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisRecordProcessor.scala
+++ b/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisRecordProcessor.scala
@@ -19,10 +19,14 @@ package org.apache.spark.streaming.kinesis
 import java.util.List
 
 import scala.collection.JavaConversions.asScalaBuffer
-import scala.collection.mutable.ArrayBuffer
+import scala.util.Random
 
 import org.apache.spark.Logging
 
+import com.amazonaws.services.kinesis.clientlibrary.exceptions.InvalidStateException
+import com.amazonaws.services.kinesis.clientlibrary.exceptions.KinesisClientLibDependencyException
+import com.amazonaws.services.kinesis.clientlibrary.exceptions.ShutdownException
+import com.amazonaws.services.kinesis.clientlibrary.exceptions.ThrottlingException
 import com.amazonaws.services.kinesis.clientlibrary.interfaces.IRecordProcessor
 import com.amazonaws.services.kinesis.clientlibrary.interfaces.IRecordProcessorCheckpointer
 import com.amazonaws.services.kinesis.clientlibrary.types.ShutdownReason
@@ -33,15 +37,15 @@ import com.amazonaws.services.kinesis.model.Record
  * This implementation operates on the Array[Byte] from the KinesisReceiver.
  * The Kinesis Worker creates an instance of this KinesisRecordProcessor upon startup.
  *
- * @param Kinesis receiver
+ * @param receiver Kinesis receiver
  * @param workerId for logging purposes
- * @param checkpointState represents the checkpoint state including the next time a 
- *   checkpoint is needed.  it's injected here for mocking purposes.
+ * @param checkpointState represents the checkpoint state including the next checkpoint time.
+ *   It's injected here for mocking purposes.
  */
 private[kinesis] class KinesisRecordProcessor(
     receiver: KinesisReceiver,
     workerId: String,
-    checkpointState: CheckpointState) extends IRecordProcessor with Logging {
+    checkpointState: KinesisCheckpointState) extends IRecordProcessor with Logging {
 
   /** shardId to be populated during initialize() */
   var shardId: String = _
@@ -61,7 +65,7 @@ private[kinesis] class KinesisRecordProcessor(
    * This is the record-processing bridge between the KCL's IRecordProcessor.processRecords()
    * and Spark Streaming's Receiver.store().
    *
-   * @param list of records from the Kinesis stream shard
+   * @param batch list of records from the Kinesis stream shard
    * @param checkpointer used to update Kinesis when this batch has been processed/stored 
    *   in the DStream
    */
@@ -69,18 +73,16 @@ private[kinesis] class KinesisRecordProcessor(
     if (!receiver.isStopped()) {
       try {
         /**
-         * Convert the list of records to a list of Array[Byte]
          * Note:  If we try to store the raw ByteBuffer from record.getData(), the Spark Streaming
          * Receiver.store(ByteBuffer) attempts to deserialize the ByteBuffer using the
          *   internally-configured Spark serializer (kryo, etc).
-         *        This is not desirable, so we instead store a raw Array[Byte] and decouple
-         *        ourselves from the internal serialization strategy.
+         * This is not desirable, so we instead store a raw Array[Byte] and decouple
+         *   ourselves from Spark's internal serialization strategy.
          */
-        val batchByteArrays = new ArrayBuffer[Array[Byte]](batch.size())
-        batchByteArrays ++= batch.map(record => record.getData().array())
-
-        /** Store the list of Array[Byte] in Spark */
-        KinesisRecordProcessorUtils.retry(receiver.store(batchByteArrays), 4, 500)
+        batch.foreach(record => 
+          KinesisRecordProcessor.retry(receiver.store(record.getData().array()), 4, 500)
+        )
+        
         logDebug(s"Stored:  Worker $workerId stored ${batch.size} records for shardId $shardId")
 
         /**
@@ -96,7 +98,7 @@ private[kinesis] class KinesisRecordProcessor(
          */
         if (checkpointState.shouldCheckpoint()) {
           /** Perform the checkpoint */
-          KinesisRecordProcessorUtils.retry(checkpointer.checkpoint(), 4, 500)
+          KinesisRecordProcessor.retry(checkpointer.checkpoint(), 4, 500)
 
           /** Update the next checkpoint time */
           checkpointState.advanceCheckpoint()
@@ -134,8 +136,8 @@ private[kinesis] class KinesisRecordProcessor(
    * 2) the failed or latent Worker has stopped sending heartbeats for whatever reason 
    *     (ShutdownReason.ZOMBIE)
    *
-   * @param checkpointer used to performn a Kinesis checkpoint for ShutdownReason.TERMINATE
-   * @param shutdown reason (ShutdownReason.TERMINATE or ShutdownReason.ZOMBIE)
+   * @param checkpointer used to perform a Kinesis checkpoint for ShutdownReason.TERMINATE
+   * @param reason for shutdown (ShutdownReason.TERMINATE or ShutdownReason.ZOMBIE)
    */
   override def shutdown(checkpointer: IRecordProcessorCheckpointer, reason: ShutdownReason) {
     logInfo(s"Shutdown:  Shutting down workerId $workerId with reason $reason")
@@ -145,7 +147,7 @@ private[kinesis] class KinesisRecordProcessor(
        * Checkpoint to indicate that all records from the shard have been drained and processed.
        * It's now OK to read from the new shards that resulted from a resharding event.
        */
-      case ShutdownReason.TERMINATE => KinesisRecordProcessorUtils.retry(checkpointer.checkpoint(),
+      case ShutdownReason.TERMINATE => KinesisRecordProcessor.retry(checkpointer.checkpoint(),
           4, 500)
 
       /**
@@ -161,3 +163,52 @@ private[kinesis] class KinesisRecordProcessor(
     }
   }
 }
+
+private[kinesis] object KinesisRecordProcessor extends Logging {
+    /**
+   * Retry the given amount of times with a random backoff time (millis) less than the
+   *   given maxBackOffMillis
+   *
+   * @param expression expression to evalute
+   * @param numRetriesLeft number of retries left
+   * @param maxBackOffMillis: max millis between retries
+   *
+   * @return evaluation of the given expression
+   * @throws Unretryable exception, unexpected exception,
+   *  or any exception that persists after numRetriesLeft reaches 0
+   */
+  @annotation.tailrec
+  def retry[T](expression: => T, numRetriesLeft: Int, maxBackOffMillis: Int): T = {
+    util.Try { expression } match {
+      /** If the function succeeded, evaluate to x. */
+      case util.Success(x) => x
+      /** If the function failed, either retry or throw the exception */
+      case util.Failure(e) => e match {
+        /** Retry:  Throttling or other Retryable exception has occurred */
+        case _: ThrottlingException | _: KinesisClientLibDependencyException if numRetriesLeft > 1
+          => {
+               val backOffMillis = Random.nextInt(maxBackOffMillis)
+               Thread.sleep(backOffMillis)
+               logError(s"Retryable Exception:  Random backOffMillis=${backOffMillis}", e)
+               retry(expression, numRetriesLeft - 1, maxBackOffMillis)
+             }
+        /** Throw:  Shutdown has been requested by the Kinesis Client Library.*/
+        case _: ShutdownException => {
+          logError(s"ShutdownException:  Caught shutdown exception, skipping checkpoint.", e)
+          throw e
+        }
+        /** Throw:  Non-retryable exception has occurred with the Kinesis Client Library */
+        case _: InvalidStateException => {
+          logError(s"InvalidStateException:  Cannot save checkpoint to the DynamoDB table used" +
+              s" by the Amazon Kinesis Client Library.  Table likely doesn't exist.", e)
+          throw e
+        }
+        /** Throw:  Unexpected exception has occurred */
+        case _ => {
+          logError(s"Unexpected, non-retryable exception.", e)
+          throw e
+        }
+      }
+    }
+  }
+}
diff --git a/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisRecordProcessorUtils.scala b/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisRecordProcessorUtils.scala
deleted file mode 100644
index 63d839f3a3bb3..0000000000000
--- a/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisRecordProcessorUtils.scala
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.spark.streaming.kinesis
-
-import scala.util.Random
-
-import org.apache.spark.Logging
-
-import com.amazonaws.services.kinesis.clientlibrary.exceptions.InvalidStateException
-import com.amazonaws.services.kinesis.clientlibrary.exceptions.KinesisClientLibDependencyException
-import com.amazonaws.services.kinesis.clientlibrary.exceptions.ShutdownException
-import com.amazonaws.services.kinesis.clientlibrary.exceptions.ThrottlingException
-
-
-/**
- * Helper for the KinesisRecordProcessor.
- */
-private[kinesis] object KinesisRecordProcessorUtils extends Logging {
-  /**
-   * Retry the given amount of times with a random backoff time (millis) less than the
-   *   given maxBackOffMillis
-   *
-   * @param expression expression to evalute
-   * @param numRetriesLeft number of retries left
-   * @param maxBackOffMillis: max millis between retries
-   *
-   * @return Evaluation of the given expression
-   * @throws Unretryable exception, unexpected exception,
-   *  or any exception that persists after numRetriesLeft reaches 0
-   */
-  @annotation.tailrec
-  def retry[T](expression: => T, numRetriesLeft: Int, maxBackOffMillis: Int): T = {
-    util.Try { expression } match {
-      /** If the function succeeded, evaluate to x. */
-      case util.Success(x) => x
-      /** If the function failed, either retry or throw the exception */
-      case util.Failure(e) => e match {
-        /** Retry:  Throttling or other Retryable exception has occurred */
-        case _: ThrottlingException | _: KinesisClientLibDependencyException if numRetriesLeft > 1
-          => {
-               val backOffMillis = Random.nextInt(maxBackOffMillis)
-               Thread.sleep(backOffMillis)
-               logError(s"Retryable Exception:  Random backOffMillis=${backOffMillis}", e)
-               retry(expression, numRetriesLeft - 1, maxBackOffMillis)
-             }
-        /** Throw:  Shutdown has been requested by the Kinesis Client Library.*/
-        case _: ShutdownException => {
-          logError(s"ShutdownException:  Caught shutdown exception, skipping checkpoint.", e)
-          throw e
-        }
-        /** Throw:  Non-retryable exception has occurred with the Kinesis Client Library */
-        case _: InvalidStateException => {
-          logError(s"InvalidStateException:  Cannot save checkpoint to the DynamoDB table used" +
-              s" by the Amazon Kinesis Client Library.  Table likely doesn't exist.", e)
-          throw e
-        }
-        /** Throw:  Unexpected exception has occurred */
-        case _ => {
-          logError(s"Unexpected, non-retryable exception.", e)
-          throw e
-        }
-      }
-    }
-  }
-}
diff --git a/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisRecordSerializer.scala b/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisRecordSerializer.scala
deleted file mode 100644
index b63f19a8fead8..0000000000000
--- a/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisRecordSerializer.scala
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.spark.streaming.kinesis
-
-/**
- * Convert custom types to/from Array[Byte].
- * @tparam type to serialize/deserialize
- */
-private[streaming] trait KinesisRecordSerializer[T] extends Serializable {
-  /**
-   * Convert type to Array[Byte]
-   *
-   * @param type to serialize
-   * @return byte array
-   */
-  def serialize(t: T): Array[Byte]
-
-  /**
-   * Convert Array[Byte] to type
-   *
-   * @param byte array
-   * @return deserialized type
-   */
-  def deserialize(array: Array[Byte]): T
-}
diff --git a/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisStringRecordSerializer.scala b/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisStringRecordSerializer.scala
deleted file mode 100644
index 4833ccd63d380..0000000000000
--- a/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisStringRecordSerializer.scala
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.spark.streaming.kinesis
-
-import org.apache.spark.Logging
-
-/**
- * Implementation of KinesisRecordSerializer to convert Array[Byte] to/from String.
- */
-class KinesisStringRecordSerializer extends KinesisRecordSerializer[String] with Logging {
-  /**
-   * Convert String to Array[Byte]
-   *
-   * @param string to serialize
-   * @return byte array
-   */
-  def serialize(string: String): Array[Byte] = {
-    string.getBytes()
-  }
-
-  /**
-   * Convert Array[Byte] to String
-   *
-   * @param byte array
-   * @return deserialized string
-   */
-  def deserialize(array: Array[Byte]): String = {
-    new String(array)
-  }
-}
diff --git a/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisUtils.scala b/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisUtils.scala
index 2b6b833457e35..d3560f6a690fc 100644
--- a/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisUtils.scala
+++ b/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisUtils.scala
@@ -19,6 +19,7 @@ package org.apache.spark.streaming.kinesis
 import org.apache.spark.Logging
 import org.apache.spark.annotation.Experimental
 import org.apache.spark.storage.StorageLevel
+import org.apache.spark.streaming.Duration
 import org.apache.spark.streaming.StreamingContext
 import org.apache.spark.streaming.api.java.JavaReceiverInputDStream
 import org.apache.spark.streaming.api.java.JavaStreamingContext
@@ -36,52 +37,63 @@ object KinesisUtils extends Logging {
   /**
    * Create an InputDStream that pulls messages from a Kinesis stream.
    *
-   * @param ssc StreamingContext
-   * @param appName unique name for your Kinesis app.  Multiple instances of the app pull from
-   *   the same stream.  The Kinesis Client Library coordinates all load-balancing and 
-   *   failure-recovery.
-   * @param stream Kinesis stream name
-   * @param endpoint url of Kinesis service (ie. https://kinesis.us-east-1.amazonaws.com)
-   *   Available endpoints:  http://docs.aws.amazon.com/general/latest/gr/rande.html#ak_region
-   * @param checkpointIntervalMillis interval (millis) for Kinesis checkpointing
-   * @param initialPositionInStream in the absence of a Kinesis checkpoint info, this is the 
-   *   worker's initial starting position in the stream.
+   * @param ssc    StreamingContext object
+   * @param streamName   Kinesis stream name
+   * @param endpointUrl  Url of Kinesis service (e.g., https://kinesis.us-east-1.amazonaws.com)
+   * @param checkpointInterval  Checkpoint interval for Kinesis checkpointing.
+   *                            See the Kinesis Spark Streaming documentation for more
+   *                            details on the different types of checkpoints.
+   * @param initialPositionInStream  In the absence of Kinesis checkpoint info, this is the
+   *                                 worker's initial starting position in the stream.
+   *                                 The values are either the beginning of the stream
+   *                                 per Kinesis' limit of 24 hours
+   *                                 (InitialPositionInStream.TRIM_HORIZON) or
+   *                                 the tip of the stream (InitialPositionInStream.LATEST).
+   * @param storageLevel Storage level to use for storing the received objects
+   *
    * @return ReceiverInputDStream[Array[Byte]]
    */
   def createStream(
       ssc: StreamingContext,
-      appName: String,
-      stream: String,
-      endpoint: String,
-      checkpointIntervalMillis: Long,
-      initialPositionInStream: InitialPositionInStream): ReceiverInputDStream[Array[Byte]] = {
-    ssc.receiverStream(new KinesisReceiver(appName, stream, endpoint, checkpointIntervalMillis, 
-        initialPositionInStream))
+      streamName: String,
+      endpointUrl: String,
+      checkpointInterval: Duration,
+      initialPositionInStream: InitialPositionInStream,
+      storageLevel: StorageLevel): ReceiverInputDStream[Array[Byte]] = {
+    ssc.receiverStream(new KinesisReceiver(ssc.sc.appName, streamName, endpointUrl,
+        checkpointInterval, initialPositionInStream, storageLevel))
   }
 
   /**
    * Create a Java-friendly InputDStream that pulls messages from a Kinesis stream.
    *
    * @param jssc Java StreamingContext object
-   * @param appName unique name for your Kinesis app.  Multiple instances of the app pull from
-   *   the same stream.  The Kinesis Client Library coordinates all load-balancing and 
-   *   failure-recovery.
-   * @param stream Kinesis stream name
-   * @param endpoint url of Kinesis service (ie. https://kinesis.us-east-1.amazonaws.com)
-   *   Available endpoints:  http://docs.aws.amazon.com/general/latest/gr/rande.html#ak_region
-   * @param checkpointIntervalMillis interval (millis) for Kinesis checkpointing
-   * @param initialPositionInStream in the absence of a Kinesis checkpoint info, this is the
-   *   worker's initial starting position in the stream.
+   * @param ssc    StreamingContext object
+   * @param streamName   Kinesis stream name
+   * @param endpointUrl  Url of Kinesis service (e.g., https://kinesis.us-east-1.amazonaws.com)
+   * @param checkpointInterval  Checkpoint interval for Kinesis checkpointing.
+   *                            See the Kinesis Spark Streaming documentation for more
+   *                            details on the different types of checkpoints.
+   * @param initialPositionInStream  In the absence of Kinesis checkpoint info, this is the
+   *                                 worker's initial starting position in the stream.
+   *                                 The values are either the beginning of the stream
+   *                                 per Kinesis' limit of 24 hours
+   *                                 (InitialPositionInStream.TRIM_HORIZON) or
+   *                                 the tip of the stream (InitialPositionInStream.LATEST).
+   * @param storageLevel Storage level to use for storing the received objects
+   *
+   * @return JavaReceiverInputDStream[Array[Byte]]   
+   *
    * @return JavaReceiverInputDStream[Array[Byte]]
    */
   def createStream(
       jssc: JavaStreamingContext, 
-      appName: String, 
-      stream: String, 
-      endpoint: String, 
-      checkpointIntervalMillis: Long,
-      initialPositionInStream: InitialPositionInStream): JavaReceiverInputDStream[Array[Byte]] = {
-    jssc.receiverStream(new KinesisReceiver(appName, stream, endpoint, checkpointIntervalMillis, 
-        initialPositionInStream))
+      streamName: String, 
+      endpointUrl: String, 
+      checkpointInterval: Duration,
+      initialPositionInStream: InitialPositionInStream,
+      storageLevel: StorageLevel): JavaReceiverInputDStream[Array[Byte]] = {
+    jssc.receiverStream(new KinesisReceiver(jssc.ssc.sc.appName, streamName,
+        endpointUrl, checkpointInterval, initialPositionInStream, storageLevel))
   }
 }
diff --git a/extras/kinesis-asl/src/test/java/org/apache/spark/streaming/kinesis/JavaKinesisStreamSuite.java b/extras/kinesis-asl/src/test/java/org/apache/spark/streaming/kinesis/JavaKinesisStreamSuite.java
new file mode 100644
index 0000000000000..87954a31f60ce
--- /dev/null
+++ b/extras/kinesis-asl/src/test/java/org/apache/spark/streaming/kinesis/JavaKinesisStreamSuite.java
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming.kinesis;
+
+import org.apache.spark.storage.StorageLevel;
+import org.apache.spark.streaming.Duration;
+import org.apache.spark.streaming.LocalJavaStreamingContext;
+import org.apache.spark.streaming.api.java.JavaDStream;
+import org.junit.Test;
+
+import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream;
+
+/**
+ * Demonstrate the use of the KinesisUtils Java API
+ */
+public class JavaKinesisStreamSuite extends LocalJavaStreamingContext {
+  @Test
+  public void testKinesisStream() {
+    // Tests the API, does not actually test data receiving
+    JavaDStream<byte[]> kinesisStream = KinesisUtils.createStream(ssc, "mySparkStream",
+        "https://kinesis.us-west-2.amazonaws.com", new Duration(2000), 
+        InitialPositionInStream.LATEST, StorageLevel.MEMORY_AND_DISK_2());
+    
+    ssc.stop();
+  }
+}
diff --git a/extras/kinesis-asl/src/test/resources/log4j.properties b/extras/kinesis-asl/src/test/resources/log4j.properties
index b4519708afdf2..b01d4482378c1 100644
--- a/extras/kinesis-asl/src/test/resources/log4j.properties
+++ b/extras/kinesis-asl/src/test/resources/log4j.properties
@@ -25,4 +25,3 @@ log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %p %c{1}:
 
 # Ignore messages below warning level from Jetty, because it's a bit verbose
 log4j.logger.org.eclipse.jetty=WARN
-
diff --git a/extras/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisReceiverSuite.scala b/extras/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisReceiverSuite.scala
index 3e97b2ce289c3..5db0b48113e88 100644
--- a/extras/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisReceiverSuite.scala
+++ b/extras/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisReceiverSuite.scala
@@ -19,12 +19,16 @@ package org.apache.spark.streaming.kinesis
 import java.nio.ByteBuffer
 
 import scala.collection.JavaConversions.seqAsJavaList
-import scala.collection.mutable.ArrayBuffer
 
+import org.apache.spark.annotation.Experimental
+import org.apache.spark.storage.StorageLevel
+import org.apache.spark.streaming.Milliseconds
+import org.apache.spark.streaming.Seconds
+import org.apache.spark.streaming.StreamingContext
+import org.apache.spark.streaming.TestSuiteBase
 import org.apache.spark.streaming.util.Clock
 import org.apache.spark.streaming.util.ManualClock
 import org.scalatest.BeforeAndAfter
-import org.scalatest.FunSuite
 import org.scalatest.Matchers
 import org.scalatest.mock.EasyMockSugar
 
@@ -33,13 +37,25 @@ import com.amazonaws.services.kinesis.clientlibrary.exceptions.KinesisClientLibD
 import com.amazonaws.services.kinesis.clientlibrary.exceptions.ShutdownException
 import com.amazonaws.services.kinesis.clientlibrary.exceptions.ThrottlingException
 import com.amazonaws.services.kinesis.clientlibrary.interfaces.IRecordProcessorCheckpointer
+import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream
 import com.amazonaws.services.kinesis.clientlibrary.types.ShutdownReason
 import com.amazonaws.services.kinesis.model.Record
 
 /**
  *  Suite of Kinesis streaming receiver tests focusing mostly on the KinesisRecordProcessor 
  */
-class KinesisReceiverSuite extends FunSuite with Matchers with BeforeAndAfter with EasyMockSugar {
+class KinesisReceiverSuite extends TestSuiteBase with Matchers with BeforeAndAfter
+    with EasyMockSugar {
+  
+  test("kinesis input stream") {
+    val ssc = new StreamingContext(master, framework, batchDuration)
+    // Tests the API, does not actually test data receiving
+    val kinesisStream = KinesisUtils.createStream(ssc, "mySparkStream",
+      "https://kinesis.us-west-2.amazonaws.com", Seconds(2),
+      InitialPositionInStream.LATEST, StorageLevel.MEMORY_AND_DISK_2);
+    ssc.stop()
+  }
+  
   val app = "TestKinesisReceiver"
   val stream = "mySparkStream"
   val endpoint = "endpoint-url"
@@ -51,20 +67,18 @@ class KinesisReceiverSuite extends FunSuite with Matchers with BeforeAndAfter wi
   val record2 = new Record()
   record2.setData(ByteBuffer.wrap("Learning Spark".getBytes()))
   val batch = List[Record](record1, record2)
-  val expectedArrayBuffer = new ArrayBuffer[Array[Byte]]() += record1.getData().array() += 
-    record2.getData().array()
 
   var receiverMock: KinesisReceiver = _
   var checkpointerMock: IRecordProcessorCheckpointer = _
   var checkpointClockMock: ManualClock = _
-  var checkpointStateMock: CheckpointState = _
+  var checkpointStateMock: KinesisCheckpointState = _
   var currentClockMock: Clock = _
 
   before {
     receiverMock = mock[KinesisReceiver]
     checkpointerMock = mock[IRecordProcessorCheckpointer]
     checkpointClockMock = mock[ManualClock]
-    checkpointStateMock = mock[CheckpointState]
+    checkpointStateMock = mock[KinesisCheckpointState]
     currentClockMock = mock[Clock]
   }
 
@@ -72,7 +86,8 @@ class KinesisReceiverSuite extends FunSuite with Matchers with BeforeAndAfter wi
     val expectedCheckpointIntervalMillis = 10
     expecting {
       receiverMock.isStopped().andReturn(false).once()
-      receiverMock.store(expectedArrayBuffer).once()
+      receiverMock.store(record1.getData().array()).once()
+      receiverMock.store(record2.getData().array()).once()
       checkpointStateMock.shouldCheckpoint().andReturn(true).once()
       checkpointerMock.checkpoint().once()
       checkpointStateMock.advanceCheckpoint().once()
@@ -98,7 +113,7 @@ class KinesisReceiverSuite extends FunSuite with Matchers with BeforeAndAfter wi
   test("shouldn't checkpoint when exception occurs during store") {
     expecting {
       receiverMock.isStopped().andReturn(false).once()
-      receiverMock.store(expectedArrayBuffer).andThrow(new RuntimeException()).once()
+      receiverMock.store(record1.getData().array()).andThrow(new RuntimeException()).once()
     }
     whenExecuting(receiverMock, checkpointerMock, checkpointStateMock) {
       intercept[RuntimeException] {
@@ -115,7 +130,7 @@ class KinesisReceiverSuite extends FunSuite with Matchers with BeforeAndAfter wi
     }
     whenExecuting(currentClockMock) {
     val checkpointIntervalMillis = 10
-    val checkpointState = new CheckpointState(checkpointIntervalMillis, currentClockMock)
+    val checkpointState = new KinesisCheckpointState(Milliseconds(checkpointIntervalMillis), currentClockMock)
     assert(checkpointState.checkpointClock.currentTime() == checkpointIntervalMillis)
     }
   }
@@ -125,7 +140,7 @@ class KinesisReceiverSuite extends FunSuite with Matchers with BeforeAndAfter wi
       currentClockMock.currentTime().andReturn(0).once()
     }
     whenExecuting(currentClockMock) {
-      val checkpointState = new CheckpointState(Long.MinValue, currentClockMock)
+      val checkpointState = new KinesisCheckpointState(Milliseconds(Long.MinValue), currentClockMock)
       assert(checkpointState.shouldCheckpoint())
     }
   }
@@ -135,7 +150,7 @@ class KinesisReceiverSuite extends FunSuite with Matchers with BeforeAndAfter wi
       currentClockMock.currentTime().andReturn(0).once()
     }
     whenExecuting(currentClockMock) {
-      val checkpointState = new CheckpointState(Long.MaxValue, currentClockMock)
+      val checkpointState = new KinesisCheckpointState(Milliseconds(Long.MaxValue), currentClockMock)
       assert(!checkpointState.shouldCheckpoint())
     }
   }
@@ -146,7 +161,7 @@ class KinesisReceiverSuite extends FunSuite with Matchers with BeforeAndAfter wi
     }
     whenExecuting(currentClockMock) {
       val checkpointIntervalMillis = 10
-      val checkpointState = new CheckpointState(checkpointIntervalMillis, currentClockMock)
+      val checkpointState = new KinesisCheckpointState(Milliseconds(checkpointIntervalMillis), currentClockMock)
       assert(checkpointState.checkpointClock.currentTime() == checkpointIntervalMillis)
       checkpointState.advanceCheckpoint()
       assert(checkpointState.checkpointClock.currentTime() == (2 * checkpointIntervalMillis))
@@ -176,25 +191,13 @@ class KinesisReceiverSuite extends FunSuite with Matchers with BeforeAndAfter wi
     }
   }
 
-  test("string record converter") {
-    val expectedString = "http://sparkinaction.com"
-    val expectedByteArray = expectedString.getBytes()
-    val stringRecordSerializer = new KinesisStringRecordSerializer()
-
-    expectedByteArray should be(stringRecordSerializer.serialize(expectedString))
-
-    expectedString should be(stringRecordSerializer.deserialize(expectedByteArray))
-    expectedString should 
-      be(stringRecordSerializer.deserialize(stringRecordSerializer.serialize(expectedString)))
-  }
-
   test("retry success on first attempt") {
     val expectedIsStopped = false
     expecting {
       receiverMock.isStopped().andReturn(expectedIsStopped).once()
     }
     whenExecuting(receiverMock) {
-      val actualVal = KinesisRecordProcessorUtils.retry(receiverMock.isStopped(), 2, 100)
+      val actualVal = KinesisRecordProcessor.retry(receiverMock.isStopped(), 2, 100)
       assert(actualVal == expectedIsStopped)
     }
   }
@@ -206,7 +209,7 @@ class KinesisReceiverSuite extends FunSuite with Matchers with BeforeAndAfter wi
         .andReturn(expectedIsStopped).once()
     }
     whenExecuting(receiverMock) {
-      val actualVal = KinesisRecordProcessorUtils.retry(receiverMock.isStopped(), 2, 100)
+      val actualVal = KinesisRecordProcessor.retry(receiverMock.isStopped(), 2, 100)
       assert(actualVal == expectedIsStopped)
     }
   }
@@ -218,7 +221,7 @@ class KinesisReceiverSuite extends FunSuite with Matchers with BeforeAndAfter wi
         .andReturn(expectedIsStopped).once()
     }
     whenExecuting(receiverMock) {
-      val actualVal = KinesisRecordProcessorUtils.retry(receiverMock.isStopped(), 2, 100)
+      val actualVal = KinesisRecordProcessor.retry(receiverMock.isStopped(), 2, 100)
       assert(actualVal == expectedIsStopped)
     }
   }
@@ -229,7 +232,7 @@ class KinesisReceiverSuite extends FunSuite with Matchers with BeforeAndAfter wi
     }
     whenExecuting(checkpointerMock) {
       intercept[ShutdownException] {
-        KinesisRecordProcessorUtils.retry(checkpointerMock.checkpoint(), 2, 100)
+        KinesisRecordProcessor.retry(checkpointerMock.checkpoint(), 2, 100)
       }
     }
   }
@@ -240,7 +243,7 @@ class KinesisReceiverSuite extends FunSuite with Matchers with BeforeAndAfter wi
     }
     whenExecuting(checkpointerMock) {
       intercept[InvalidStateException] {
-        KinesisRecordProcessorUtils.retry(checkpointerMock.checkpoint(), 2, 100)
+        KinesisRecordProcessor.retry(checkpointerMock.checkpoint(), 2, 100)
       }
     }
   }
@@ -251,7 +254,7 @@ class KinesisReceiverSuite extends FunSuite with Matchers with BeforeAndAfter wi
     }
     whenExecuting(checkpointerMock) {
       intercept[RuntimeException] {
-        KinesisRecordProcessorUtils.retry(checkpointerMock.checkpoint(), 2, 100)
+        KinesisRecordProcessor.retry(checkpointerMock.checkpoint(), 2, 100)
       }
     }
   }
@@ -264,7 +267,7 @@ class KinesisReceiverSuite extends FunSuite with Matchers with BeforeAndAfter wi
     }
     whenExecuting(checkpointerMock) {
       val exception = intercept[RuntimeException] {
-        KinesisRecordProcessorUtils.retry(checkpointerMock.checkpoint(), 2, 100)
+        KinesisRecordProcessor.retry(checkpointerMock.checkpoint(), 2, 100)
       }
       exception.getMessage().shouldBe(expectedErrorMessage)
     }
diff --git a/make-distribution.sh b/make-distribution.sh
index 6a50bc74022ef..0a3283ecec6f8 100755
--- a/make-distribution.sh
+++ b/make-distribution.sh
@@ -196,8 +196,6 @@ cp -r "$FWDIR/bin" "$DISTDIR"
 cp -r "$FWDIR/python" "$DISTDIR"
 cp -r "$FWDIR/sbin" "$DISTDIR"
 cp -r "$FWDIR/ec2" "$DISTDIR"
-cp -r "$FWDIR/extras/kinesis-asl/bin" "$DISTDIR"
-
 
 # Download and copy in tachyon, if requested
 if [ "$SPARK_TACHYON" == "true" ]; then

From 691a6be900015358d55a03c046f93d6336297ea2 Mon Sep 17 00:00:00 2001
From: Chris Fregly <chris@fregly.com>
Date: Fri, 1 Aug 2014 14:47:00 -0700
Subject: [PATCH 10/12] fixed tests and formatting, fixed a bug with
 JavaKinesisWordCount during union of streams

---
 .../streaming/JavaKinesisWordCountASL.java    |  2 +-
 .../streaming/KinesisWordCountASL.scala       |  6 +++--
 .../kinesis/KinesisReceiverSuite.scala        | 22 +++++++++----------
 3 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/examples/src/main/java/org/apache/spark/examples/streaming/JavaKinesisWordCountASL.java b/examples/src/main/java/org/apache/spark/examples/streaming/JavaKinesisWordCountASL.java
index 31793aaa020ba..f630dcd0ab16f 100644
--- a/examples/src/main/java/org/apache/spark/examples/streaming/JavaKinesisWordCountASL.java
+++ b/examples/src/main/java/org/apache/spark/examples/streaming/JavaKinesisWordCountASL.java
@@ -136,7 +136,7 @@ public static void main(String[] args) {
 
         /** Create the same number of Kinesis DStreams/Receivers as Kinesis stream's shards */
         List<JavaDStream<byte[]>> streamsList = new ArrayList<JavaDStream<byte[]>>(numStreams);
-        for (int i = 0; i < streamsList.size(); i++) {
+        for (int i = 0; i < numStreams; i++) {
         	streamsList.add(
                 KinesisUtils.createStream(jssc, streamName, endpointUrl, checkpointInterval, 
                 InitialPositionInStream.LATEST, StorageLevel.MEMORY_AND_DISK_2())
diff --git a/examples/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCountASL.scala b/examples/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCountASL.scala
index 865eea433aeb9..8fe90dc18e471 100644
--- a/examples/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCountASL.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCountASL.scala
@@ -94,7 +94,8 @@ object KinesisWordCountASL extends Logging {
     /** Determine the number of shards from the stream */
     val kinesisClient = new AmazonKinesisClient(new DefaultAWSCredentialsProviderChain())
     kinesisClient.setEndpoint(endpointUrl)
-    val numShards = kinesisClient.describeStream(streamName).getStreamDescription().getShards().size()
+    val numShards = kinesisClient.describeStream(streamName).getStreamDescription().getShards()
+      .size()
 
     /** In this example, we're going to create 1 Kinesis Worker/Receiver/DStream for each shard. */
     val numStreams = numShards
@@ -108,7 +109,8 @@ object KinesisWordCountASL extends Logging {
     /** Setup the and SparkConfig and StreamingContext */
     /** Spark Streaming batch interval */
     val batchInterval = Milliseconds(2000)    
-    val sparkConfig = new SparkConf().setAppName("KinesisWordCount").setMaster(s"local[$numSparkThreads]")
+    val sparkConfig = new SparkConf().setAppName("KinesisWordCount")
+      .setMaster(s"local[$numSparkThreads]")
     val ssc = new StreamingContext(sparkConfig, batchInterval)
     /** Setup the checkpoint directory used by Spark Streaming */
     ssc.checkpoint("/tmp/checkpoint");
diff --git a/extras/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisReceiverSuite.scala b/extras/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisReceiverSuite.scala
index 5db0b48113e88..cafac31961103 100644
--- a/extras/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisReceiverSuite.scala
+++ b/extras/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisReceiverSuite.scala
@@ -46,16 +46,7 @@ import com.amazonaws.services.kinesis.model.Record
  */
 class KinesisReceiverSuite extends TestSuiteBase with Matchers with BeforeAndAfter
     with EasyMockSugar {
-  
-  test("kinesis input stream") {
-    val ssc = new StreamingContext(master, framework, batchDuration)
-    // Tests the API, does not actually test data receiving
-    val kinesisStream = KinesisUtils.createStream(ssc, "mySparkStream",
-      "https://kinesis.us-west-2.amazonaws.com", Seconds(2),
-      InitialPositionInStream.LATEST, StorageLevel.MEMORY_AND_DISK_2);
-    ssc.stop()
-  }
-  
+
   val app = "TestKinesisReceiver"
   val stream = "mySparkStream"
   val endpoint = "endpoint-url"
@@ -74,7 +65,7 @@ class KinesisReceiverSuite extends TestSuiteBase with Matchers with BeforeAndAft
   var checkpointStateMock: KinesisCheckpointState = _
   var currentClockMock: Clock = _
 
-  before {
+  override def beforeFunction() = {
     receiverMock = mock[KinesisReceiver]
     checkpointerMock = mock[IRecordProcessorCheckpointer]
     checkpointClockMock = mock[ManualClock]
@@ -82,6 +73,15 @@ class KinesisReceiverSuite extends TestSuiteBase with Matchers with BeforeAndAft
     currentClockMock = mock[Clock]
   }
 
+  test("kinesis utils api") {
+    val ssc = new StreamingContext(master, framework, batchDuration)
+    // Tests the API, does not actually test data receiving
+    val kinesisStream = KinesisUtils.createStream(ssc, "mySparkStream",
+      "https://kinesis.us-west-2.amazonaws.com", Seconds(2),
+      InitialPositionInStream.LATEST, StorageLevel.MEMORY_AND_DISK_2);
+    ssc.stop()
+  }
+
   test("process records including store and checkpoint") {
     val expectedCheckpointIntervalMillis = 10
     expecting {

From 0393795b53c2789973c081dba6f7651fd8678adc Mon Sep 17 00:00:00 2001
From: Chris Fregly <chris@fregly.com>
Date: Fri, 1 Aug 2014 19:23:04 -0700
Subject: [PATCH 11/12] moved Kinesis examples out of examples/ and back into
 extras/kinesis-asl

updated the build to only include kinesis-asl inside the examples jar
when -Pkinesis-asl is specified
---
 assembly/pom.xml                              | 10 ---
 examples/pom.xml                              | 18 ++--
 .../streaming/JavaKinesisWordCountASL.java    | 44 +++++-----
 .../streaming/KinesisWordCountASL.scala       | 86 +++++++++++--------
 .../kinesis/KinesisCheckpointState.scala      |  5 +-
 .../streaming/kinesis/KinesisReceiver.scala   | 12 +--
 .../kinesis/KinesisRecordProcessor.scala      | 36 ++++----
 7 files changed, 112 insertions(+), 99 deletions(-)
 rename {examples => extras/kinesis-asl}/src/main/java/org/apache/spark/examples/streaming/JavaKinesisWordCountASL.java (82%)
 rename {examples => extras/kinesis-asl}/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCountASL.scala (76%)

diff --git a/assembly/pom.xml b/assembly/pom.xml
index 76099b074c7ed..703f15925bc44 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -185,16 +185,6 @@
         </dependency>
       </dependencies>
     </profile>
-    <profile>
-      <id>kinesis-asl</id>
-      <dependencies>
-        <dependency>
-          <groupId>org.apache.spark</groupId>
-          <artifactId>spark-streaming-kinesis-asl_${scala.binary.version}</artifactId>
-          <version>${project.version}</version>
-        </dependency>
-      </dependencies>
-    </profile>
     <profile>
       <id>bigtop-dist</id>
       <!-- This profile uses the assembly plugin to create a special "dist" package for BigTop
diff --git a/examples/pom.xml b/examples/pom.xml
index ffcec8d56f5c6..8c4c128bb484d 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -34,6 +34,19 @@
   <name>Spark Project Examples</name>
   <url>http://spark.apache.org/</url>
 
+  <profiles>
+    <profile>
+      <id>kinesis-asl</id>
+      <dependencies>
+        <dependency>
+          <groupId>org.apache.spark</groupId>
+          <artifactId>spark-streaming-kinesis-asl_${scala.binary.version}</artifactId>
+          <version>${project.version}</version>
+        </dependency>
+      </dependencies>
+    </profile>
+  </profiles>
+  
   <dependencies>
     <dependency>
       <groupId>org.apache.spark</groupId>
@@ -96,11 +109,6 @@
       <artifactId>spark-streaming-mqtt_${scala.binary.version}</artifactId>
       <version>${project.version}</version>
     </dependency>
-    <dependency>
-      <groupId>org.apache.spark</groupId>
-      <artifactId>spark-streaming-kinesis-asl_${scala.binary.version}</artifactId>
-      <version>${project.version}</version>
-    </dependency>
     <dependency>
       <groupId>org.apache.hbase</groupId>
       <artifactId>hbase</artifactId>
diff --git a/examples/src/main/java/org/apache/spark/examples/streaming/JavaKinesisWordCountASL.java b/extras/kinesis-asl/src/main/java/org/apache/spark/examples/streaming/JavaKinesisWordCountASL.java
similarity index 82%
rename from examples/src/main/java/org/apache/spark/examples/streaming/JavaKinesisWordCountASL.java
rename to extras/kinesis-asl/src/main/java/org/apache/spark/examples/streaming/JavaKinesisWordCountASL.java
index f630dcd0ab16f..647772131d293 100644
--- a/examples/src/main/java/org/apache/spark/examples/streaming/JavaKinesisWordCountASL.java
+++ b/extras/kinesis-asl/src/main/java/org/apache/spark/examples/streaming/JavaKinesisWordCountASL.java
@@ -79,14 +79,14 @@ public final class JavaKinesisWordCountASL {
     private static final Pattern WORD_SEPARATOR = Pattern.compile(" ");
     private static final Logger logger = Logger.getLogger(JavaKinesisWordCountASL.class);
 
-    /**
+    /*
      * Make the constructor private to enforce singleton
      */
     private JavaKinesisWordCountASL() {
     }
 
     public static void main(String[] args) {
-        /**
+        /*
          * Check that all required args were passed in.
          */
         if (args.length < 2) {
@@ -100,41 +100,41 @@ public static void main(String[] args) {
 
         StreamingExamples.setStreamingLogLevels();
 
-        /** Populate the appropriate variables from the given args */
+        /* Populate the appropriate variables from the given args */
         String streamName = args[0];
         String endpointUrl = args[1];
-        /** Set the batch interval to a fixed 2000 millis (2 seconds) */
+        /* Set the batch interval to a fixed 2000 millis (2 seconds) */
         Duration batchInterval = new Duration(2000);
 
-        /** Create a Kinesis client in order to determine the number of shards for the given stream */
+        /* Create a Kinesis client in order to determine the number of shards for the given stream */
         AmazonKinesisClient kinesisClient = new AmazonKinesisClient(
                 new DefaultAWSCredentialsProviderChain());
         kinesisClient.setEndpoint(endpointUrl);
 
-        /** Determine the number of shards from the stream */
+        /* Determine the number of shards from the stream */
         int numShards = kinesisClient.describeStream(streamName)
                 .getStreamDescription().getShards().size();
 
-        /** In this example, we're going to create 1 Kinesis Worker/Receiver/DStream for each shard */ 
+        /* In this example, we're going to create 1 Kinesis Worker/Receiver/DStream for each shard */ 
         int numStreams = numShards;
 
-        /** Must add 1 more thread than the number of receivers or the output won't show properly from the driver */
+        /* Must add 1 more thread than the number of receivers or the output won't show properly from the driver */
         int numSparkThreads = numStreams + 1;
 
-        /** Setup the Spark config. */
+        /* Setup the Spark config. */
         SparkConf sparkConfig = new SparkConf().setAppName("KinesisWordCount").setMaster(
                 "local[" + numSparkThreads + "]");
 
-        /** Kinesis checkpoint interval.  Same as batchInterval for this example. */
+        /* Kinesis checkpoint interval.  Same as batchInterval for this example. */
         Duration checkpointInterval = batchInterval;
 
-        /** Setup the StreamingContext */
+        /* Setup the StreamingContext */
         JavaStreamingContext jssc = new JavaStreamingContext(sparkConfig, batchInterval);
 
-        /** Setup the checkpoint directory used by Spark Streaming */
+        /* Setup the checkpoint directory used by Spark Streaming */
         jssc.checkpoint("/tmp/checkpoint");
 
-        /** Create the same number of Kinesis DStreams/Receivers as Kinesis stream's shards */
+        /* Create the same number of Kinesis DStreams/Receivers as Kinesis stream's shards */
         List<JavaDStream<byte[]>> streamsList = new ArrayList<JavaDStream<byte[]>>(numStreams);
         for (int i = 0; i < numStreams; i++) {
         	streamsList.add(
@@ -143,19 +143,19 @@ public static void main(String[] args) {
             );
         }
 
-        /** Union all the streams if there is more than 1 stream */
+        /* Union all the streams if there is more than 1 stream */
         JavaDStream<byte[]> unionStreams;
         if (streamsList.size() > 1) {
             unionStreams = jssc.union(streamsList.get(0), streamsList.subList(1, streamsList.size()));
         } else {
-            /** Otherwise, just use the 1 stream */
+            /* Otherwise, just use the 1 stream */
             unionStreams = streamsList.get(0);
         }
 
-        /**
-          * Split each line of the union'd DStreams into multiple words using flatMap to produce the collection.
-          * Convert lines of byte[] to multiple Strings by first converting to String, then splitting on WORD_SEPARATOR.
-          */
+        /*
+         * Split each line of the union'd DStreams into multiple words using flatMap to produce the collection.
+         * Convert lines of byte[] to multiple Strings by first converting to String, then splitting on WORD_SEPARATOR.
+         */
         JavaDStream<String> words = unionStreams.flatMap(new FlatMapFunction<byte[], String>() {
                 @Override
                 public Iterable<String> call(byte[] line) {
@@ -163,7 +163,7 @@ public Iterable<String> call(byte[] line) {
                 }
             });
 
-        /** Map each word to a (word, 1) tuple, then reduce/aggregate by key. */
+        /* Map each word to a (word, 1) tuple, then reduce/aggregate by key. */
         JavaPairDStream<String, Integer> wordCounts = words.mapToPair(
             new PairFunction<String, String, Integer>() {
                 @Override
@@ -177,10 +177,10 @@ public Integer call(Integer i1, Integer i2) {
                 }
             });
 
-        /** Print the first 10 wordCounts by key */
+        /* Print the first 10 wordCounts by key */
         wordCounts.print();
 
-        /** Start the streaming context and await termination */
+        /* Start the streaming context and await termination */
         jssc.start();
         jssc.awaitTermination();
     }
diff --git a/examples/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCountASL.scala b/extras/kinesis-asl/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCountASL.scala
similarity index 76%
rename from examples/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCountASL.scala
rename to extras/kinesis-asl/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCountASL.scala
index 8fe90dc18e471..63b610ac29879 100644
--- a/examples/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCountASL.scala
+++ b/extras/kinesis-asl/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCountASL.scala
@@ -18,9 +18,7 @@
 package org.apache.spark.examples.streaming
 
 import java.nio.ByteBuffer
-
 import scala.util.Random
-
 import org.apache.spark.Logging
 import org.apache.spark.SparkConf
 import org.apache.spark.storage.StorageLevel
@@ -28,11 +26,12 @@ import org.apache.spark.streaming.Milliseconds
 import org.apache.spark.streaming.StreamingContext
 import org.apache.spark.streaming.StreamingContext.toPairDStreamFunctions
 import org.apache.spark.streaming.kinesis.KinesisUtils
-
 import com.amazonaws.auth.DefaultAWSCredentialsProviderChain
 import com.amazonaws.services.kinesis.AmazonKinesisClient
 import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream
 import com.amazonaws.services.kinesis.model.PutRecordRequest
+import org.apache.log4j.Logger
+import org.apache.log4j.Level
 
 /**
  * Kinesis Spark Streaming WordCount example.
@@ -72,9 +71,7 @@ import com.amazonaws.services.kinesis.model.PutRecordRequest
  */
 object KinesisWordCountASL extends Logging {
   def main(args: Array[String]) {
-/**
- * Check that all required args were passed in.
- */
+    /* Check that all required args were passed in. */
     if (args.length < 2) {
       System.err.println(
         """
@@ -87,57 +84,57 @@ object KinesisWordCountASL extends Logging {
     }
 
     StreamingExamples.setStreamingLogLevels()
-    
-    /** Populate the appropriate variables from the given args */
+
+    /* Populate the appropriate variables from the given args */
     val Array(streamName, endpointUrl) = args
 
-    /** Determine the number of shards from the stream */
+    /* Determine the number of shards from the stream */
     val kinesisClient = new AmazonKinesisClient(new DefaultAWSCredentialsProviderChain())
     kinesisClient.setEndpoint(endpointUrl)
     val numShards = kinesisClient.describeStream(streamName).getStreamDescription().getShards()
       .size()
 
-    /** In this example, we're going to create 1 Kinesis Worker/Receiver/DStream for each shard. */
+    /* In this example, we're going to create 1 Kinesis Worker/Receiver/DStream for each shard. */
     val numStreams = numShards
 
-    /** 
+    /* 
      *  numSparkThreads should be 1 more thread than the number of receivers.
      *  This leaves one thread available for actually processing the data.
      */
     val numSparkThreads = numStreams + 1
 
-    /** Setup the and SparkConfig and StreamingContext */
-    /** Spark Streaming batch interval */
+    /* Setup the and SparkConfig and StreamingContext */
+    /* Spark Streaming batch interval */
     val batchInterval = Milliseconds(2000)    
     val sparkConfig = new SparkConf().setAppName("KinesisWordCount")
       .setMaster(s"local[$numSparkThreads]")
     val ssc = new StreamingContext(sparkConfig, batchInterval)
-    /** Setup the checkpoint directory used by Spark Streaming */
+    /* Setup the checkpoint directory used by Spark Streaming */
     ssc.checkpoint("/tmp/checkpoint");
 
-    /** Kinesis checkpoint interval.  Same as batchInterval for this example. */
+    /* Kinesis checkpoint interval.  Same as batchInterval for this example. */
     val kinesisCheckpointInterval = batchInterval
 
-    /** Create the same number of Kinesis DStreams/Receivers as Kinesis stream's shards */
+    /* Create the same number of Kinesis DStreams/Receivers as Kinesis stream's shards */
     val kinesisStreams = (0 until numStreams).map { i =>
       KinesisUtils.createStream(ssc, streamName, endpointUrl, kinesisCheckpointInterval,
           InitialPositionInStream.LATEST, StorageLevel.MEMORY_AND_DISK_2)
     }
 
-    /** Union all the streams */
+    /* Union all the streams */
     val unionStreams = ssc.union(kinesisStreams)
 
-    /** Convert each line of Array[Byte] to String, split into words, and count them */
+    /* Convert each line of Array[Byte] to String, split into words, and count them */
     val words = unionStreams.flatMap(byteArray => new String(byteArray)
       .split(" "))
 
-    /** Map each word to a (word, 1) tuple so we can reduce/aggregate by key. */
+    /* Map each word to a (word, 1) tuple so we can reduce/aggregate by key. */
     val wordCounts = words.map(word => (word, 1)).reduceByKey(_ + _)
 
-    /** Print the first 10 wordCounts by key */
+    /* Print the first 10 wordCounts by key */
     wordCounts.print()
 
-    /** Start the streaming context and await termination */
+    /* Start the streaming context and await termination */
     ssc.start()
     ssc.awaitTermination()
   }
@@ -169,13 +166,13 @@ object KinesisWordCountProducerASL {
 
     StreamingExamples.setStreamingLogLevels()
 
-    /** Populate the appropriate variables from the given args */
+    /* Populate the appropriate variables from the given args */
     val Array(stream, endpoint, recordsPerSecond, wordsPerRecord) = args
 
-    /** Generate the records and return the totals */
+    /* Generate the records and return the totals */
     val totals = generate(stream, endpoint, recordsPerSecond.toInt, wordsPerRecord.toInt)
 
-    /** Print the array of (index, total) tuples */
+    /* Print the array of (index, total) tuples */
     println("Totals")
     totals.foreach(total => println(total.toString()))
   }
@@ -187,7 +184,7 @@ object KinesisWordCountProducerASL {
 
     val MaxRandomInts = 10
 
-    /** Create the Kinesis client */
+    /* Create the Kinesis client */
     val kinesisClient = new AmazonKinesisClient(new DefaultAWSCredentialsProviderChain())
     kinesisClient.setEndpoint(endpoint)
 
@@ -195,43 +192,62 @@ object KinesisWordCountProducerASL {
       s" $recordsPerSecond records per second and $wordsPerRecord words per record");
 
     val totals = new Array[Int](MaxRandomInts)
-    /** Put String records onto the stream per the given recordPerSec and wordsPerRecord */
+    /* Put String records onto the stream per the given recordPerSec and wordsPerRecord */
     for (i <- 1 to 5) {
 
-      /** Generate recordsPerSec records to put onto the stream */
+      /* Generate recordsPerSec records to put onto the stream */
       val records = (1 to recordsPerSecond.toInt).map { recordNum =>
-        /** 
+        /* 
          *  Randomly generate each wordsPerRec words between 0 (inclusive)
          *  and MAX_RANDOM_INTS (exclusive) 
          */
         val data = (1 to wordsPerRecord.toInt).map(x => {
-          /** Generate the random int */
+          /* Generate the random int */
           val randomInt = Random.nextInt(MaxRandomInts)
 
-          /** Keep track of the totals */
+          /* Keep track of the totals */
           totals(randomInt) += 1
 
           randomInt.toString()
         }).mkString(" ")
 
-        /** Create a partitionKey based on recordNum */
+        /* Create a partitionKey based on recordNum */
         val partitionKey = s"partitionKey-$recordNum"
 
-        /** Create a PutRecordRequest with an Array[Byte] version of the data */
+        /* Create a PutRecordRequest with an Array[Byte] version of the data */
         val putRecordRequest = new PutRecordRequest().withStreamName(stream)
             .withPartitionKey(partitionKey)
             .withData(ByteBuffer.wrap(data.getBytes()));
 
-        /** Put the record onto the stream and capture the PutRecordResult */
+        /* Put the record onto the stream and capture the PutRecordResult */
         val putRecordResult = kinesisClient.putRecord(putRecordRequest);
       }
 
-      /** Sleep for a second */
+      /* Sleep for a second */
       Thread.sleep(1000)
       println("Sent " + recordsPerSecond + " records")
     }
 
-    /** Convert the totals to (index, total) tuple */
+    /* Convert the totals to (index, total) tuple */
     (0 to (MaxRandomInts - 1)).zip(totals)
   }
 }
+
+/** 
+ *  Utility functions for Spark Streaming examples. 
+ *  This has been lifted from the examples/ project to remove the circular dependency.
+ */
+object StreamingExamples extends Logging {
+
+  /** Set reasonable logging levels for streaming if the user has not configured log4j. */
+  def setStreamingLogLevels() {
+    val log4jInitialized = Logger.getRootLogger.getAllAppenders.hasMoreElements
+    if (!log4jInitialized) {
+      // We first log something to initialize Spark's default logging, then we override the
+      // logging level.
+      logInfo("Setting log level to [WARN] for streaming example." +
+        " To override add a custom log4j.properties to the classpath.")
+      Logger.getRootLogger.setLevel(Level.WARN)
+    }
+  }
+}
diff --git a/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisCheckpointState.scala b/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisCheckpointState.scala
index a541a72614cbf..0b80b611cdce7 100644
--- a/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisCheckpointState.scala
+++ b/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisCheckpointState.scala
@@ -32,9 +32,8 @@ private[kinesis] class KinesisCheckpointState(
     checkpointInterval: Duration, 
     currentClock: Clock = new SystemClock())
   extends Logging {
-  /**
-   * Initialize the checkpoint clock using the given currentClock + checkpointInterval millis
-   */
+  
+  /* Initialize the checkpoint clock using the given currentClock + checkpointInterval millis */
   val checkpointClock = new ManualClock()
   checkpointClock.setTime(currentClock.currentTime() + checkpointInterval.milliseconds)
 
diff --git a/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisReceiver.scala b/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisReceiver.scala
index 7d3897d45c77f..1bd1f324298e7 100644
--- a/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisReceiver.scala
+++ b/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisReceiver.scala
@@ -70,18 +70,18 @@ private[kinesis] class KinesisReceiver(
     storageLevel: StorageLevel)
   extends Receiver[Array[Byte]](storageLevel) with Logging { receiver =>
 
-  /**
+  /*
    * The following vars are built in the onStart() method which executes in the Spark Worker after
    *   this code is serialized and shipped remotely.
    */
 
-  /**
+  /*
    *  workerId should be based on the ip address of the actual Spark Worker where this code runs
    *   (not the Driver's ip address.)
    */
   var workerId: String = null
 
-  /**
+  /*
    * This impl uses the DefaultAWSCredentialsProviderChain and searches for credentials 
    *   in the following order of precedence:
    * Environment Variables - AWS_ACCESS_KEY_ID and AWS_SECRET_KEY
@@ -92,10 +92,10 @@ private[kinesis] class KinesisReceiver(
    */
   var credentialsProvider: AWSCredentialsProvider = null
 
-  /** KCL config instance. */
+  /* KCL config instance. */
   var kinesisClientLibConfiguration: KinesisClientLibConfiguration = null
 
-  /**
+  /*
    *  RecordProcessorFactory creates impls of IRecordProcessor.
    *  IRecordProcessor adapts the KCL to our Spark KinesisReceiver via the 
    *    IRecordProcessor.processRecords() method.
@@ -103,7 +103,7 @@ private[kinesis] class KinesisReceiver(
    */
   var recordProcessorFactory: IRecordProcessorFactory = null
 
-  /**
+  /*
    * Create a Kinesis Worker.
    * This is the core client abstraction from the Kinesis Client Library (KCL).
    * We pass the RecordProcessorFactory from above as well as the KCL config instance.
diff --git a/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisRecordProcessor.scala b/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisRecordProcessor.scala
index 5d201819a8f87..ba41435d2363d 100644
--- a/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisRecordProcessor.scala
+++ b/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisRecordProcessor.scala
@@ -47,7 +47,7 @@ private[kinesis] class KinesisRecordProcessor(
     workerId: String,
     checkpointState: KinesisCheckpointState) extends IRecordProcessor with Logging {
 
-  /** shardId to be populated during initialize() */
+  /* shardId to be populated during initialize() */
   var shardId: String = _
 
   /**
@@ -72,7 +72,7 @@ private[kinesis] class KinesisRecordProcessor(
   override def processRecords(batch: List[Record], checkpointer: IRecordProcessorCheckpointer) {
     if (!receiver.isStopped()) {
       try {
-        /**
+        /*
          * Note:  If we try to store the raw ByteBuffer from record.getData(), the Spark Streaming
          * Receiver.store(ByteBuffer) attempts to deserialize the ByteBuffer using the
          *   internally-configured Spark serializer (kryo, etc).
@@ -85,7 +85,7 @@ private[kinesis] class KinesisRecordProcessor(
         
         logDebug(s"Stored:  Worker $workerId stored ${batch.size} records for shardId $shardId")
 
-        /**
+        /*
          * Checkpoint the sequence number of the last record successfully processed/stored 
          *   in the batch.
          * In this implementation, we're checkpointing after the given checkpointIntervalMillis.
@@ -97,10 +97,10 @@ private[kinesis] class KinesisRecordProcessor(
          * This could lead to records being processed more than once.
          */
         if (checkpointState.shouldCheckpoint()) {
-          /** Perform the checkpoint */
+          /* Perform the checkpoint */
           KinesisRecordProcessor.retry(checkpointer.checkpoint(), 4, 500)
 
-          /** Update the next checkpoint time */
+          /* Update the next checkpoint time */
           checkpointState.advanceCheckpoint()
 
           logDebug(s"Checkpoint:  WorkerId $workerId completed checkpoint of ${batch.size}" +
@@ -110,7 +110,7 @@ private[kinesis] class KinesisRecordProcessor(
         }
       } catch {
         case e: Throwable => {
-          /**
+          /*
            *  If there is a failure within the batch, the batch will not be checkpointed.
            *  This will potentially cause records since the last checkpoint to be processed
            *     more than once.
@@ -118,12 +118,12 @@ private[kinesis] class KinesisRecordProcessor(
           logError(s"Exception:  WorkerId $workerId encountered and exception while storing " +
               " or checkpointing a batch for workerId $workerId and shardId $shardId.", e)
 
-          /** Rethrow the exception to the Kinesis Worker that is managing this RecordProcessor.*/
+          /* Rethrow the exception to the Kinesis Worker that is managing this RecordProcessor.*/
           throw e
         }
       }
     } else {
-      /** RecordProcessor has been stopped. */
+      /* RecordProcessor has been stopped. */
       logInfo(s"Stopped:  The Spark KinesisReceiver has stopped for workerId $workerId" + 
           s" and shardId $shardId.  No more records will be processed.")
     }
@@ -142,7 +142,7 @@ private[kinesis] class KinesisRecordProcessor(
   override def shutdown(checkpointer: IRecordProcessorCheckpointer, reason: ShutdownReason) {
     logInfo(s"Shutdown:  Shutting down workerId $workerId with reason $reason")
     reason match {
-      /**
+      /*
        * TERMINATE Use Case.  Checkpoint.
        * Checkpoint to indicate that all records from the shard have been drained and processed.
        * It's now OK to read from the new shards that resulted from a resharding event.
@@ -150,7 +150,7 @@ private[kinesis] class KinesisRecordProcessor(
       case ShutdownReason.TERMINATE => KinesisRecordProcessor.retry(checkpointer.checkpoint(),
           4, 500)
 
-      /**
+      /*
        * ZOMBIE Use Case.  NoOp.
        * No checkpoint because other workers may have taken over and already started processing
        *    the same records.
@@ -158,14 +158,14 @@ private[kinesis] class KinesisRecordProcessor(
        */
       case ShutdownReason.ZOMBIE =>
 
-      /** Unknown reason.  NoOp */
+      /* Unknown reason.  NoOp */
       case _ =>
     }
   }
 }
 
 private[kinesis] object KinesisRecordProcessor extends Logging {
-    /**
+  /**
    * Retry the given amount of times with a random backoff time (millis) less than the
    *   given maxBackOffMillis
    *
@@ -180,11 +180,11 @@ private[kinesis] object KinesisRecordProcessor extends Logging {
   @annotation.tailrec
   def retry[T](expression: => T, numRetriesLeft: Int, maxBackOffMillis: Int): T = {
     util.Try { expression } match {
-      /** If the function succeeded, evaluate to x. */
+      /* If the function succeeded, evaluate to x. */
       case util.Success(x) => x
-      /** If the function failed, either retry or throw the exception */
+      /* If the function failed, either retry or throw the exception */
       case util.Failure(e) => e match {
-        /** Retry:  Throttling or other Retryable exception has occurred */
+        /* Retry:  Throttling or other Retryable exception has occurred */
         case _: ThrottlingException | _: KinesisClientLibDependencyException if numRetriesLeft > 1
           => {
                val backOffMillis = Random.nextInt(maxBackOffMillis)
@@ -192,18 +192,18 @@ private[kinesis] object KinesisRecordProcessor extends Logging {
                logError(s"Retryable Exception:  Random backOffMillis=${backOffMillis}", e)
                retry(expression, numRetriesLeft - 1, maxBackOffMillis)
              }
-        /** Throw:  Shutdown has been requested by the Kinesis Client Library.*/
+        /* Throw:  Shutdown has been requested by the Kinesis Client Library.*/
         case _: ShutdownException => {
           logError(s"ShutdownException:  Caught shutdown exception, skipping checkpoint.", e)
           throw e
         }
-        /** Throw:  Non-retryable exception has occurred with the Kinesis Client Library */
+        /* Throw:  Non-retryable exception has occurred with the Kinesis Client Library */
         case _: InvalidStateException => {
           logError(s"InvalidStateException:  Cannot save checkpoint to the DynamoDB table used" +
               s" by the Amazon Kinesis Client Library.  Table likely doesn't exist.", e)
           throw e
         }
-        /** Throw:  Unexpected exception has occurred */
+        /* Throw:  Unexpected exception has occurred */
         case _ => {
           logError(s"Unexpected, non-retryable exception.", e)
           throw e

From 47745816b21d7d2255a98283e3055a5a2a397a27 Mon Sep 17 00:00:00 2001
From: Chris Fregly <chris@fregly.com>
Date: Sat, 2 Aug 2014 00:10:07 -0700
Subject: [PATCH 12/12] updated docs, renamed retry to retryRandom to be more
 clear, removed retries around store() method

---
 bin/run-example                               |  1 -
 bin/run-example2.cmd                          |  3 +-
 dev/audit-release/audit_release.py            |  2 +-
 dev/audit-release/sbt_app_kinesis/build.sbt   |  2 --
 dev/run-tests                                 |  3 ++
 docs/streaming-kinesis.md                     | 30 ++++++++-----------
 extras/kinesis-asl/pom.xml                    |  5 +---
 .../streaming/JavaKinesisWordCountASL.java    | 15 +++-------
 .../src/main/resources/log4j.properties       |  7 +----
 .../streaming/KinesisWordCountASL.scala       |  4 +--
 .../kinesis/KinesisRecordProcessor.scala      | 14 ++++-----
 .../streaming/kinesis/KinesisUtils.scala      |  5 +---
 .../src/test/resources/log4j.properties       |  1 -
 .../kinesis/KinesisReceiverSuite.scala        | 14 ++++-----
 14 files changed, 40 insertions(+), 66 deletions(-)

diff --git a/bin/run-example b/bin/run-example
index 65d20738260bf..68a35702eddd3 100755
--- a/bin/run-example
+++ b/bin/run-example
@@ -31,7 +31,6 @@ else
   echo "  - set MASTER=XX to use a specific master" 1>&2
   echo "  - can use abbreviated example class name relative to com.apache.spark.examples" 1>&2
   echo "     (e.g. SparkPi, mllib.LinearRegression, streaming.KinesisWordCountASL)" 1>&2
-  echo "  - to run the Kinesis Spark Streaming example, make sure you build with -Pkinesis-asl" 1>&2
   exit 1
 fi
 
diff --git a/bin/run-example2.cmd b/bin/run-example2.cmd
index eadedd7fa61ff..b29bf90c64e90 100644
--- a/bin/run-example2.cmd
+++ b/bin/run-example2.cmd
@@ -32,7 +32,8 @@ rem Test that an argument was given
 if not "x%1"=="x" goto arg_given
   echo Usage: run-example ^<example-class^> [example-args]
   echo   - set MASTER=XX to use a specific master
-  echo   - can use abbreviated example class name (e.g. SparkPi, mllib.LinearRegression)
+  echo   - can use abbreviated example class name relative to com.apache.spark.examples
+  echo      (e.g. SparkPi, mllib.LinearRegression, streaming.KinesisWordCountASL)
   goto exit
 :arg_given
 
diff --git a/dev/audit-release/audit_release.py b/dev/audit-release/audit_release.py
index a5224c97c449b..16ea1a71290dc 100755
--- a/dev/audit-release/audit_release.py
+++ b/dev/audit-release/audit_release.py
@@ -105,7 +105,7 @@ def get_url(url):
     "spark-core", "spark-bagel", "spark-mllib", "spark-streaming", "spark-repl",
     "spark-graphx", "spark-streaming-flume", "spark-streaming-kafka",
     "spark-streaming-mqtt", "spark-streaming-twitter", "spark-streaming-zeromq",
-    "spark-catalyst", "spark-sql", "spark-hive", "kinesis-asl"
+    "spark-catalyst", "spark-sql", "spark-hive", "spark-streaming-kinesis-asl"
 ]
 modules = map(lambda m: "%s_%s" % (m, SCALA_BINARY_VERSION), modules)
 
diff --git a/dev/audit-release/sbt_app_kinesis/build.sbt b/dev/audit-release/sbt_app_kinesis/build.sbt
index 5dfd16c185f61..981bc7957b5ed 100644
--- a/dev/audit-release/sbt_app_kinesis/build.sbt
+++ b/dev/audit-release/sbt_app_kinesis/build.sbt
@@ -21,8 +21,6 @@ version := "1.0"
 
 scalaVersion := System.getenv.get("SCALA_VERSION")
 
-libraryDependencies += "org.apache.spark" %% "spark-core" % System.getenv.get("SPARK_VERSION")
-libraryDependencies += "org.apache.spark" %% "spark-streaming" % System.getenv.get("SPARK_VERSION")
 libraryDependencies += "org.apache.spark" %% "spark-streaming-kinesis-asl" % System.getenv.get("SPARK_VERSION")
 
 resolvers ++= Seq(
diff --git a/dev/run-tests b/dev/run-tests
index daa85bc750c07..d401c90f41d7b 100755
--- a/dev/run-tests
+++ b/dev/run-tests
@@ -36,6 +36,9 @@ fi
 if [ -z "$SBT_MAVEN_PROFILES_ARGS" ]; then
   export SBT_MAVEN_PROFILES_ARGS="-Pyarn -Phadoop-2.3 -Dhadoop.version=2.3.0"
 fi
+
+export SBT_MAVEN_PROFILES_ARGS="$SBT_MAVEN_PROFILES_ARGS -Pkinesis-asl"
+
 echo "SBT_MAVEN_PROFILES_ARGS=\"$SBT_MAVEN_PROFILES_ARGS\""
 
 # Remove work directory
diff --git a/docs/streaming-kinesis.md b/docs/streaming-kinesis.md
index 3dd6c01c4cd7c..801c905c88df8 100644
--- a/docs/streaming-kinesis.md
+++ b/docs/streaming-kinesis.md
@@ -8,17 +8,21 @@ Build notes:
 <li>Spark supports a Kinesis Streaming Receiver which is not included in the default build due to licensing restrictions.</li>
 <li>_**Note that by embedding this library you will include [ASL](https://aws.amazon.com/asl/)-licensed code in your Spark package**_.</li>
 <li>The Spark Kinesis Streaming Receiver source code, examples, tests, and artifacts live in $SPARK_HOME/extras/kinesis-asl.</li>
-<li>sbt and maven builds:  must enable the `-Pkinesis-asl` profile.</li>
-<li>To build the examples JAR, you must run the maven build with `mvn -Pkinesis-asl package` to create the runnable Kinesis example jar.</li>
-<li>Applications will need to link to the 'kinesis-asl` artifact.</li>
+<li>To build with Kinesis, you must run the maven or sbt builds with -Pkinesis-asl`.</li>
+<li>Applications will need to link to the 'spark-streaming-kinesis-asl` artifact.</li>
+
+Kinesis examples notes:
+<li>To build the Kinesis examples, you must run the maven or sbt builds with -Pkinesis-asl`.</li>
+<li>These examples automatically determine the number of local threads and KinesisReceivers to spin up based on the number of shards for the stream.</li>
+<li>KinesisWordCountProducerASL will generate random data to put onto the Kinesis stream for testing.</li>
+<li>Checkpointing is disabled (no checkpoint dir is set).  The examples as written will not recover from a driver failure.</li>
 
 Deployment and runtime notes:
+<li>A single KinesisReceiver can process many shards of a stream.</li>
 <li>Each shard of a stream is processed by one or more KinesisReceiver's managed by the Kinesis Client Library (KCL) Worker.</li>
-<li>Said differently, a single KinesisReceiver can process many shards of a stream.</li>
 <li>You never need more KinesisReceivers than the number of shards in your stream.</li>
 <li>You can horizontally scale the receiving by creating more KinesisReceiver/DStreams (up to the number of shards for a given stream)</li>
-<li>The Kinesis assembly jar must also be present on all worker nodes, as they will need access to the Kinesis Client Library.</li>
-<li>/tmp/checkpoint is a valid and accessible directory on all workers (or locally if running in local mode)</li>
+<li>The Kinesis libraries must be present on all worker nodes, as they will need access to the Kinesis Client Library.</li>
 <li>This code uses the DefaultAWSCredentialsProviderChain and searches for credentials in the following order of precedence:<br/>
     1) Environment Variables - AWS_ACCESS_KEY_ID and AWS_SECRET_KEY<br/>
     2) Java System Properties - aws.accessKeyId and aws.secretKey<br/>
@@ -27,14 +31,12 @@ Deployment and runtime notes:
 </li>
 <li>You need to setup a Kinesis stream with 1 or more shards per the following:<br/>
  http://docs.aws.amazon.com/kinesis/latest/dev/step-one-create-stream.html</li>
-<li>Valid Kinesis endpoint urls can be found here:  Valid enpoint urls:  http://docs.aws.amazon.com/general/latest/gr/rande.html#ak_region</li>
+<li>Valid Kinesis endpoint urls can be found here:  Valid endpoint urls:  http://docs.aws.amazon.com/general/latest/gr/rande.html#ak_region</li>
 <li>When you first start up the KinesisReceiver, the Kinesis Client Library (KCL) needs ~30s to establish connectivity with the AWS Kinesis service,
 retrieve any checkpoint data, and negotiate with other KCL's reading from the same stream.</li>
-<li>During testing, I noticed varying degrees of delays while retrieving records from Kinesis depending on which coffee shop in San Francisco I was working.
-The input and output data eventually matched, but sometimes after an unusually long time.</li>
 <li>Be careful when changing the app name.  Kinesis maintains a mapping table in DynamoDB based on this app name (http://docs.aws.amazon.com/kinesis/latest/dev/kinesis-record-processor-implementation-app.html#kinesis-record-processor-initialization).  
 Changing the app name could lead to Kinesis errors as only 1 logical application can process a stream.  In order to start fresh, 
-it's always best to delete the DynamoDB table that matches your app name.</li>
+it's always best to delete the DynamoDB table that matches your app name.  This DynamoDB table lives in us-east-1 regardless of the Kinesis endpoint URL.</li>
 
 Failure recovery notes:
 <li>The combination of Spark Streaming and Kinesis creates 3 different checkpoints as follows:<br/>
@@ -42,7 +44,6 @@ Failure recovery notes:
   2) RDD metadata checkpoint (Spark Streaming) - frequency is every DStream batch<br/>
   3) Kinesis checkpointing (Kinesis) - frequency is controlled by the developer calling ICheckpointer.checkpoint() directly<br/>
 </li>
-<li>During testing, if you see the same data being read from the stream twice, it's likely due to the Kinesis checkpoints not being written.</li>
 <li>Checkpointing too frequently will cause excess load on the AWS checkpoint storage layer and may lead to AWS throttling</li>
 <li>Upon startup, a KinesisReceiver will begin processing records with sequence numbers greater than the last checkpoint sequence number recorded per shard.</li>
 <li>If no checkpoint info exists, the worker will start either from the oldest record available (InitialPositionInStream.TRIM_HORIZON)
@@ -54,9 +55,4 @@ depending on the checkpoint frequency.</li>
 <li>InitialPositionInStream.TRIM_HORIZON may lead to duplicate processing of records depending on the checkpoint frequency.</li>
 <li>Record processing should be idempotent when possible.</li>
 <li>Failed or latent KinesisReceivers will be detected and automatically shutdown/load-balanced by the KCL.</li>
-<li>If possible, explicitly shutdown the worker if a failure occurs.</li>
-
-Example KinesisWordCount (and JavaKinesisWordCount) notes:
-<li>These examples automatically determine the number of threads to run locally based on the number of shards for the stream.</li>
-<li>These examples automatically determine the number of KinesisReceivers/InputDStreams to create based on the number of shards for the stream.</li>
-<li>The KinesisWordCountProducer will generate random data to put onto the Kinesis stream for testing.</li>
+<li>If possible, explicitly shutdown the worker if a failure occurs in order to trigger the final checkpoint.</li>
diff --git a/extras/kinesis-asl/pom.xml b/extras/kinesis-asl/pom.xml
index 739a010200dee..a54b34235dfb4 100644
--- a/extras/kinesis-asl/pom.xml
+++ b/extras/kinesis-asl/pom.xml
@@ -24,10 +24,7 @@
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
-  <!-- 
-    Kinesis integration is not included by default due to ASL-licensed code.
-    Note:  This project - if activated - is packaged with the main Spark assembly.
-  -->
+  <!-- Kinesis integration is not included by default due to ASL-licensed code. -->
   <groupId>org.apache.spark</groupId>
   <artifactId>spark-streaming-kinesis-asl_2.10</artifactId>
   <packaging>jar</packaging>
diff --git a/extras/kinesis-asl/src/main/java/org/apache/spark/examples/streaming/JavaKinesisWordCountASL.java b/extras/kinesis-asl/src/main/java/org/apache/spark/examples/streaming/JavaKinesisWordCountASL.java
index 647772131d293..a8b907b241893 100644
--- a/extras/kinesis-asl/src/main/java/org/apache/spark/examples/streaming/JavaKinesisWordCountASL.java
+++ b/extras/kinesis-asl/src/main/java/org/apache/spark/examples/streaming/JavaKinesisWordCountASL.java
@@ -79,16 +79,12 @@ public final class JavaKinesisWordCountASL {
     private static final Pattern WORD_SEPARATOR = Pattern.compile(" ");
     private static final Logger logger = Logger.getLogger(JavaKinesisWordCountASL.class);
 
-    /*
-     * Make the constructor private to enforce singleton
-     */
+    /* Make the constructor private to enforce singleton */
     private JavaKinesisWordCountASL() {
     }
 
     public static void main(String[] args) {
-        /*
-         * Check that all required args were passed in.
-         */
+        /* Check that all required args were passed in. */
         if (args.length < 2) {
           System.err.println(
               "|Usage: KinesisWordCount <stream-name> <endpoint-url>\n" +
@@ -131,9 +127,6 @@ public static void main(String[] args) {
         /* Setup the StreamingContext */
         JavaStreamingContext jssc = new JavaStreamingContext(sparkConfig, batchInterval);
 
-        /* Setup the checkpoint directory used by Spark Streaming */
-        jssc.checkpoint("/tmp/checkpoint");
-
         /* Create the same number of Kinesis DStreams/Receivers as Kinesis stream's shards */
         List<JavaDStream<byte[]>> streamsList = new ArrayList<JavaDStream<byte[]>>(numStreams);
         for (int i = 0; i < numStreams; i++) {
@@ -163,7 +156,7 @@ public Iterable<String> call(byte[] line) {
                 }
             });
 
-        /* Map each word to a (word, 1) tuple, then reduce/aggregate by key. */
+        /* Map each word to a (word, 1) tuple, then reduce/aggregate by word. */
         JavaPairDStream<String, Integer> wordCounts = words.mapToPair(
             new PairFunction<String, String, Integer>() {
                 @Override
@@ -177,7 +170,7 @@ public Integer call(Integer i1, Integer i2) {
                 }
             });
 
-        /* Print the first 10 wordCounts by key */
+        /* Print the first 10 wordCounts */
         wordCounts.print();
 
         /* Start the streaming context and await termination */
diff --git a/extras/kinesis-asl/src/main/resources/log4j.properties b/extras/kinesis-asl/src/main/resources/log4j.properties
index ad789341e62c9..97348fb5b6123 100644
--- a/extras/kinesis-asl/src/main/resources/log4j.properties
+++ b/extras/kinesis-asl/src/main/resources/log4j.properties
@@ -15,7 +15,6 @@
 # limitations under the License.
 #
 
-# Set everything to be logged to the file streaming/target/unit-tests.log
 log4j.rootCategory=WARN, console
 
 # File appender
@@ -35,8 +34,4 @@ log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}:
 log4j.logger.org.eclipse.jetty=WARN
 log4j.logger.org.eclipse.jetty.util.component.AbstractLifeCycle=ERROR
 log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO
-log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO
-
-# Log all Kinesis Streaming messages
-log4j.logger.org.apache.spark.examples.streaming=DEBUG
-log4j.logger.org.apache.spark.streaming.kinesis=DEBUG
+log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO
\ No newline at end of file
diff --git a/extras/kinesis-asl/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCountASL.scala b/extras/kinesis-asl/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCountASL.scala
index 63b610ac29879..d03edf8b30a9f 100644
--- a/extras/kinesis-asl/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCountASL.scala
+++ b/extras/kinesis-asl/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCountASL.scala
@@ -109,8 +109,6 @@ object KinesisWordCountASL extends Logging {
     val sparkConfig = new SparkConf().setAppName("KinesisWordCount")
       .setMaster(s"local[$numSparkThreads]")
     val ssc = new StreamingContext(sparkConfig, batchInterval)
-    /* Setup the checkpoint directory used by Spark Streaming */
-    ssc.checkpoint("/tmp/checkpoint");
 
     /* Kinesis checkpoint interval.  Same as batchInterval for this example. */
     val kinesisCheckpointInterval = batchInterval
@@ -131,7 +129,7 @@ object KinesisWordCountASL extends Logging {
     /* Map each word to a (word, 1) tuple so we can reduce/aggregate by key. */
     val wordCounts = words.map(word => (word, 1)).reduceByKey(_ + _)
 
-    /* Print the first 10 wordCounts by key */
+    /* Print the first 10 wordCounts */
     wordCounts.print()
 
     /* Start the streaming context and await termination */
diff --git a/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisRecordProcessor.scala b/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisRecordProcessor.scala
index ba41435d2363d..8ecc2d90160b1 100644
--- a/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisRecordProcessor.scala
+++ b/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisRecordProcessor.scala
@@ -79,9 +79,7 @@ private[kinesis] class KinesisRecordProcessor(
          * This is not desirable, so we instead store a raw Array[Byte] and decouple
          *   ourselves from Spark's internal serialization strategy.
          */
-        batch.foreach(record => 
-          KinesisRecordProcessor.retry(receiver.store(record.getData().array()), 4, 500)
-        )
+        batch.foreach(record => receiver.store(record.getData().array()))
         
         logDebug(s"Stored:  Worker $workerId stored ${batch.size} records for shardId $shardId")
 
@@ -98,7 +96,7 @@ private[kinesis] class KinesisRecordProcessor(
          */
         if (checkpointState.shouldCheckpoint()) {
           /* Perform the checkpoint */
-          KinesisRecordProcessor.retry(checkpointer.checkpoint(), 4, 500)
+          KinesisRecordProcessor.retryRandom(checkpointer.checkpoint(), 4, 100)
 
           /* Update the next checkpoint time */
           checkpointState.advanceCheckpoint()
@@ -147,8 +145,8 @@ private[kinesis] class KinesisRecordProcessor(
        * Checkpoint to indicate that all records from the shard have been drained and processed.
        * It's now OK to read from the new shards that resulted from a resharding event.
        */
-      case ShutdownReason.TERMINATE => KinesisRecordProcessor.retry(checkpointer.checkpoint(),
-          4, 500)
+      case ShutdownReason.TERMINATE => 
+        KinesisRecordProcessor.retryRandom(checkpointer.checkpoint(), 4, 100)
 
       /*
        * ZOMBIE Use Case.  NoOp.
@@ -178,7 +176,7 @@ private[kinesis] object KinesisRecordProcessor extends Logging {
    *  or any exception that persists after numRetriesLeft reaches 0
    */
   @annotation.tailrec
-  def retry[T](expression: => T, numRetriesLeft: Int, maxBackOffMillis: Int): T = {
+  def retryRandom[T](expression: => T, numRetriesLeft: Int, maxBackOffMillis: Int): T = {
     util.Try { expression } match {
       /* If the function succeeded, evaluate to x. */
       case util.Success(x) => x
@@ -190,7 +188,7 @@ private[kinesis] object KinesisRecordProcessor extends Logging {
                val backOffMillis = Random.nextInt(maxBackOffMillis)
                Thread.sleep(backOffMillis)
                logError(s"Retryable Exception:  Random backOffMillis=${backOffMillis}", e)
-               retry(expression, numRetriesLeft - 1, maxBackOffMillis)
+               retryRandom(expression, numRetriesLeft - 1, maxBackOffMillis)
              }
         /* Throw:  Shutdown has been requested by the Kinesis Client Library.*/
         case _: ShutdownException => {
diff --git a/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisUtils.scala b/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisUtils.scala
index d3560f6a690fc..713cac0e293c0 100644
--- a/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisUtils.scala
+++ b/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisUtils.scala
@@ -16,7 +16,6 @@
  */
 package org.apache.spark.streaming.kinesis
 
-import org.apache.spark.Logging
 import org.apache.spark.annotation.Experimental
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.streaming.Duration
@@ -33,7 +32,7 @@ import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionIn
  * :: Experimental ::
  */
 @Experimental
-object KinesisUtils extends Logging {
+object KinesisUtils {
   /**
    * Create an InputDStream that pulls messages from a Kinesis stream.
    *
@@ -82,8 +81,6 @@ object KinesisUtils extends Logging {
    *                                 the tip of the stream (InitialPositionInStream.LATEST).
    * @param storageLevel Storage level to use for storing the received objects
    *
-   * @return JavaReceiverInputDStream[Array[Byte]]   
-   *
    * @return JavaReceiverInputDStream[Array[Byte]]
    */
   def createStream(
diff --git a/extras/kinesis-asl/src/test/resources/log4j.properties b/extras/kinesis-asl/src/test/resources/log4j.properties
index b01d4482378c1..e01e049595475 100644
--- a/extras/kinesis-asl/src/test/resources/log4j.properties
+++ b/extras/kinesis-asl/src/test/resources/log4j.properties
@@ -14,7 +14,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-# Set everything to be logged to the file streaming/target/unit-tests.log
 log4j.rootCategory=INFO, file
 # log4j.appender.file=org.apache.log4j.FileAppender
 log4j.appender.file=org.apache.log4j.FileAppender
diff --git a/extras/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisReceiverSuite.scala b/extras/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisReceiverSuite.scala
index cafac31961103..41dbd64c2b1fa 100644
--- a/extras/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisReceiverSuite.scala
+++ b/extras/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisReceiverSuite.scala
@@ -197,7 +197,7 @@ class KinesisReceiverSuite extends TestSuiteBase with Matchers with BeforeAndAft
       receiverMock.isStopped().andReturn(expectedIsStopped).once()
     }
     whenExecuting(receiverMock) {
-      val actualVal = KinesisRecordProcessor.retry(receiverMock.isStopped(), 2, 100)
+      val actualVal = KinesisRecordProcessor.retryRandom(receiverMock.isStopped(), 2, 100)
       assert(actualVal == expectedIsStopped)
     }
   }
@@ -209,7 +209,7 @@ class KinesisReceiverSuite extends TestSuiteBase with Matchers with BeforeAndAft
         .andReturn(expectedIsStopped).once()
     }
     whenExecuting(receiverMock) {
-      val actualVal = KinesisRecordProcessor.retry(receiverMock.isStopped(), 2, 100)
+      val actualVal = KinesisRecordProcessor.retryRandom(receiverMock.isStopped(), 2, 100)
       assert(actualVal == expectedIsStopped)
     }
   }
@@ -221,7 +221,7 @@ class KinesisReceiverSuite extends TestSuiteBase with Matchers with BeforeAndAft
         .andReturn(expectedIsStopped).once()
     }
     whenExecuting(receiverMock) {
-      val actualVal = KinesisRecordProcessor.retry(receiverMock.isStopped(), 2, 100)
+      val actualVal = KinesisRecordProcessor.retryRandom(receiverMock.isStopped(), 2, 100)
       assert(actualVal == expectedIsStopped)
     }
   }
@@ -232,7 +232,7 @@ class KinesisReceiverSuite extends TestSuiteBase with Matchers with BeforeAndAft
     }
     whenExecuting(checkpointerMock) {
       intercept[ShutdownException] {
-        KinesisRecordProcessor.retry(checkpointerMock.checkpoint(), 2, 100)
+        KinesisRecordProcessor.retryRandom(checkpointerMock.checkpoint(), 2, 100)
       }
     }
   }
@@ -243,7 +243,7 @@ class KinesisReceiverSuite extends TestSuiteBase with Matchers with BeforeAndAft
     }
     whenExecuting(checkpointerMock) {
       intercept[InvalidStateException] {
-        KinesisRecordProcessor.retry(checkpointerMock.checkpoint(), 2, 100)
+        KinesisRecordProcessor.retryRandom(checkpointerMock.checkpoint(), 2, 100)
       }
     }
   }
@@ -254,7 +254,7 @@ class KinesisReceiverSuite extends TestSuiteBase with Matchers with BeforeAndAft
     }
     whenExecuting(checkpointerMock) {
       intercept[RuntimeException] {
-        KinesisRecordProcessor.retry(checkpointerMock.checkpoint(), 2, 100)
+        KinesisRecordProcessor.retryRandom(checkpointerMock.checkpoint(), 2, 100)
       }
     }
   }
@@ -267,7 +267,7 @@ class KinesisReceiverSuite extends TestSuiteBase with Matchers with BeforeAndAft
     }
     whenExecuting(checkpointerMock) {
       val exception = intercept[RuntimeException] {
-        KinesisRecordProcessor.retry(checkpointerMock.checkpoint(), 2, 100)
+        KinesisRecordProcessor.retryRandom(checkpointerMock.checkpoint(), 2, 100)
       }
       exception.getMessage().shouldBe(expectedErrorMessage)
     }