From 77409967008415b7ea57e9349d296350e6519687 Mon Sep 17 00:00:00 2001
From: Yin Huai <yhuai@databricks.com>
Date: Wed, 6 May 2015 14:48:25 -0700
Subject: [PATCH 01/48] [HOT-FIX] Move HiveWindowFunctionQuerySuite.scala to
 hive compatibility dir.

Author: Yin Huai <yhuai@databricks.com>

Closes #5951 from yhuai/fixBuildMaven and squashes the following commits:

fdde183 [Yin Huai] Move HiveWindowFunctionQuerySuite.scala to hive compatibility dir.
---
 .../spark/sql/hive/execution/HiveWindowFunctionQuerySuite.scala   | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename sql/hive/{ => compatibility}/src/test/scala/org/apache/spark/sql/hive/execution/HiveWindowFunctionQuerySuite.scala (100%)

diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveWindowFunctionQuerySuite.scala b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveWindowFunctionQuerySuite.scala
similarity index 100%
rename from sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveWindowFunctionQuerySuite.scala
rename to sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveWindowFunctionQuerySuite.scala

From 1ad04dae038673a448f529c39b17817b78d6acd0 Mon Sep 17 00:00:00 2001
From: "Joseph K. Bradley" <joseph@databricks.com>
Date: Wed, 6 May 2015 16:15:51 -0700
Subject: [PATCH 02/48] [SPARK-5995] [ML] Make Prediction dev API public

Changes:
* Update protected prediction methods, following design doc. **<--most interesting change**
* Changed abstract classes for Estimator and Model to be public.  Added DeveloperApi tag.  (I kept the traits for Estimator/Model Params private.)
* Changed ProbabilisticClassificationModel method names to use probability instead of probabilities.

CC: mengxr shivaram etrain

Author: Joseph K. Bradley <joseph@databricks.com>

Closes #5913 from jkbradley/public-dev-api and squashes the following commits:

e9aa0ea [Joseph K. Bradley] moved findMax to DenseVector and renamed to argmax. fixed bug for vector of length 0
15b9957 [Joseph K. Bradley] renamed probabilities to probability in method names
5cda84d [Joseph K. Bradley] regenerated sharedParams
7d1877a [Joseph K. Bradley] Made spark.ml prediction abstractions public.  Organized their prediction methods for efficient computation of multiple output columns.
---
 .../ml/{impl/estimator => }/Predictor.scala   |  50 ++------
 .../spark/ml/classification/Classifier.scala  | 110 ++++++------------
 .../DecisionTreeClassifier.scala              |   5 +-
 .../ml/classification/GBTClassifier.scala     |   5 +-
 .../classification/LogisticRegression.scala   | 100 ++++++----------
 .../ProbabilisticClassifier.scala             | 100 +++++++++++-----
 .../RandomForestClassifier.scala              |   5 +-
 .../ml/param/shared/SharedParamsCodeGen.scala |   6 +-
 .../spark/ml/param/shared/sharedParams.scala  |   4 +-
 .../ml/regression/DecisionTreeRegressor.scala |   5 +-
 .../spark/ml/regression/GBTRegressor.scala    |   5 +-
 .../ml/regression/LinearRegression.scala      |   5 +-
 .../ml/regression/RandomForestRegressor.scala |   5 +-
 .../spark/ml/regression/Regressor.scala       |  42 ++-----
 .../spark/ml/{impl => }/tree/treeParams.scala |   4 +-
 .../apache/spark/mllib/linalg/Vectors.scala   |  22 ++++
 16 files changed, 206 insertions(+), 267 deletions(-)
 rename mllib/src/main/scala/org/apache/spark/ml/{impl/estimator => }/Predictor.scala (86%)
 rename mllib/src/main/scala/org/apache/spark/ml/{impl => }/tree/treeParams.scala (99%)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/impl/estimator/Predictor.scala b/mllib/src/main/scala/org/apache/spark/ml/Predictor.scala
similarity index 86%
rename from mllib/src/main/scala/org/apache/spark/ml/impl/estimator/Predictor.scala
rename to mllib/src/main/scala/org/apache/spark/ml/Predictor.scala
index e8b3628140e99..0e53877de92db 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/impl/estimator/Predictor.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/Predictor.scala
@@ -15,29 +15,23 @@
  * limitations under the License.
  */
 
-package org.apache.spark.ml.impl.estimator
+package org.apache.spark.ml
 
-import org.apache.spark.annotation.{AlphaComponent, DeveloperApi}
-import org.apache.spark.ml.{Estimator, Model}
+import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.ml.param._
 import org.apache.spark.ml.param.shared._
 import org.apache.spark.ml.util.SchemaUtils
 import org.apache.spark.mllib.linalg.{Vector, VectorUDT}
 import org.apache.spark.mllib.regression.LabeledPoint
 import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.{DataFrame, Row}
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.types.{DataType, DoubleType, StructType}
+import org.apache.spark.sql.{DataFrame, Row}
 
 /**
- * :: DeveloperApi ::
- *
- * Trait for parameters for prediction (regression and classification).
- *
- * NOTE: This is currently private[spark] but will be made public later once it is stabilized.
+ * (private[ml])  Trait for parameters for prediction (regression and classification).
  */
-@DeveloperApi
-private[spark] trait PredictorParams extends Params
+private[ml] trait PredictorParams extends Params
   with HasLabelCol with HasFeaturesCol with HasPredictionCol {
 
   /**
@@ -63,7 +57,7 @@ private[spark] trait PredictorParams extends Params
 }
 
 /**
- * :: AlphaComponent ::
+ * :: DeveloperApi ::
  *
  * Abstraction for prediction problems (regression and classification).
  *
@@ -73,11 +67,9 @@ private[spark] trait PredictorParams extends Params
  *                  parameter to specify the concrete type.
  * @tparam M  Specialization of [[PredictionModel]].  If you subclass this type, use this type
  *            parameter to specify the concrete type for the corresponding model.
- *
- * NOTE: This is currently private[spark] but will be made public later once it is stabilized.
  */
-@AlphaComponent
-private[spark] abstract class Predictor[
+@DeveloperApi
+abstract class Predictor[
     FeaturesType,
     Learner <: Predictor[FeaturesType, Learner, M],
     M <: PredictionModel[FeaturesType, M]]
@@ -104,8 +96,6 @@ private[spark] abstract class Predictor[
   }
 
   /**
-   * :: DeveloperApi ::
-   *
    * Train a model using the given dataset and parameters.
    * Developers can implement this instead of [[fit()]] to avoid dealing with schema validation
    * and copying parameters into the model.
@@ -113,12 +103,9 @@ private[spark] abstract class Predictor[
    * @param dataset  Training dataset
    * @return  Fitted model
    */
-  @DeveloperApi
   protected def train(dataset: DataFrame): M
 
   /**
-   * :: DeveloperApi ::
-   *
    * Returns the SQL DataType corresponding to the FeaturesType type parameter.
    *
    * This is used by [[validateAndTransformSchema()]].
@@ -126,7 +113,6 @@ private[spark] abstract class Predictor[
    *
    * The default value is VectorUDT, but it may be overridden if FeaturesType is not Vector.
    */
-  @DeveloperApi
   protected def featuresDataType: DataType = new VectorUDT
 
   override def transformSchema(schema: StructType): StructType = {
@@ -146,7 +132,7 @@ private[spark] abstract class Predictor[
 }
 
 /**
- * :: AlphaComponent ::
+ * :: DeveloperApi ::
  *
  * Abstraction for a model for prediction tasks (regression and classification).
  *
@@ -154,11 +140,9 @@ private[spark] abstract class Predictor[
  *                       E.g., [[org.apache.spark.mllib.linalg.VectorUDT]] for vector features.
  * @tparam M  Specialization of [[PredictionModel]].  If you subclass this type, use this type
  *            parameter to specify the concrete type for the corresponding model.
- *
- * NOTE: This is currently private[spark] but will be made public later once it is stabilized.
  */
-@AlphaComponent
-private[spark] abstract class PredictionModel[FeaturesType, M <: PredictionModel[FeaturesType, M]]
+@DeveloperApi
+abstract class PredictionModel[FeaturesType, M <: PredictionModel[FeaturesType, M]]
   extends Model[M] with PredictorParams {
 
   /** @group setParam */
@@ -168,8 +152,6 @@ private[spark] abstract class PredictionModel[FeaturesType, M <: PredictionModel
   def setPredictionCol(value: String): M = set(predictionCol, value).asInstanceOf[M]
 
   /**
-   * :: DeveloperApi ::
-   *
    * Returns the SQL DataType corresponding to the FeaturesType type parameter.
    *
    * This is used by [[validateAndTransformSchema()]].
@@ -177,7 +159,6 @@ private[spark] abstract class PredictionModel[FeaturesType, M <: PredictionModel
    *
    * The default value is VectorUDT, but it may be overridden if FeaturesType is not Vector.
    */
-  @DeveloperApi
   protected def featuresDataType: DataType = new VectorUDT
 
   override def transformSchema(schema: StructType): StructType = {
@@ -192,12 +173,8 @@ private[spark] abstract class PredictionModel[FeaturesType, M <: PredictionModel
    * @return transformed dataset with [[predictionCol]] of type [[Double]]
    */
   override def transform(dataset: DataFrame): DataFrame = {
-    // This default implementation should be overridden as needed.
-
-    // Check schema
     transformSchema(dataset.schema, logging = true)
-
-    if ($(predictionCol) != "") {
+    if ($(predictionCol).nonEmpty) {
       dataset.withColumn($(predictionCol), callUDF(predict _, DoubleType, col($(featuresCol))))
     } else {
       this.logWarning(s"$uid: Predictor.transform() was called as NOOP" +
@@ -207,11 +184,8 @@ private[spark] abstract class PredictionModel[FeaturesType, M <: PredictionModel
   }
 
   /**
-   * :: DeveloperApi ::
-   *
    * Predict label for the given features.
    * This internal method is used to implement [[transform()]] and output [[predictionCol]].
    */
-  @DeveloperApi
   protected def predict(features: FeaturesType): Double
 }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/Classifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/Classifier.scala
index d3361e24705c8..263d580fe2dd3 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/Classifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/Classifier.scala
@@ -17,8 +17,8 @@
 
 package org.apache.spark.ml.classification
 
-import org.apache.spark.annotation.{AlphaComponent, DeveloperApi}
-import org.apache.spark.ml.impl.estimator.{PredictionModel, Predictor, PredictorParams}
+import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.ml.{PredictionModel, PredictorParams, Predictor}
 import org.apache.spark.ml.param.shared.HasRawPredictionCol
 import org.apache.spark.ml.util.SchemaUtils
 import org.apache.spark.mllib.linalg.{Vector, VectorUDT}
@@ -26,15 +26,12 @@ import org.apache.spark.sql.DataFrame
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.types.{DataType, DoubleType, StructType}
 
+
 /**
- * :: DeveloperApi ::
- * Params for classification.
- *
- * NOTE: This is currently private[spark] but will be made public later once it is stabilized.
+ * (private[spark]) Params for classification.
  */
-@DeveloperApi
-private[spark] trait ClassifierParams extends PredictorParams
-  with HasRawPredictionCol {
+private[spark] trait ClassifierParams
+  extends PredictorParams with HasRawPredictionCol {
 
   override protected def validateAndTransformSchema(
       schema: StructType,
@@ -46,23 +43,21 @@ private[spark] trait ClassifierParams extends PredictorParams
 }
 
 /**
- * :: AlphaComponent ::
+ * :: DeveloperApi ::
+ *
  * Single-label binary or multiclass classification.
  * Classes are indexed {0, 1, ..., numClasses - 1}.
  *
  * @tparam FeaturesType  Type of input features.  E.g., [[Vector]]
  * @tparam E  Concrete Estimator type
  * @tparam M  Concrete Model type
- *
- * NOTE: This is currently private[spark] but will be made public later once it is stabilized.
  */
-@AlphaComponent
-private[spark] abstract class Classifier[
+@DeveloperApi
+abstract class Classifier[
     FeaturesType,
     E <: Classifier[FeaturesType, E, M],
     M <: ClassificationModel[FeaturesType, M]]
-  extends Predictor[FeaturesType, E, M]
-  with ClassifierParams {
+  extends Predictor[FeaturesType, E, M] with ClassifierParams {
 
   /** @group setParam */
   def setRawPredictionCol(value: String): E = set(rawPredictionCol, value).asInstanceOf[E]
@@ -71,17 +66,15 @@ private[spark] abstract class Classifier[
 }
 
 /**
- * :: AlphaComponent ::
+ * :: DeveloperApi ::
+ *
  * Model produced by a [[Classifier]].
  * Classes are indexed {0, 1, ..., numClasses - 1}.
  *
  * @tparam FeaturesType  Type of input features.  E.g., [[Vector]]
  * @tparam M  Concrete Model type
- *
- * NOTE: This is currently private[spark] but will be made public later once it is stabilized.
  */
-@AlphaComponent
-private[spark]
+@DeveloperApi
 abstract class ClassificationModel[FeaturesType, M <: ClassificationModel[FeaturesType, M]]
   extends PredictionModel[FeaturesType, M] with ClassifierParams {
 
@@ -101,13 +94,27 @@ abstract class ClassificationModel[FeaturesType, M <: ClassificationModel[Featur
    * @return transformed dataset
    */
   override def transform(dataset: DataFrame): DataFrame = {
-    // This default implementation should be overridden as needed.
-
-    // Check schema
     transformSchema(dataset.schema, logging = true)
 
-    val (numColsOutput, outputData) =
-      ClassificationModel.transformColumnsImpl[FeaturesType](dataset, this)
+    // Output selected columns only.
+    // This is a bit complicated since it tries to avoid repeated computation.
+    var outputData = dataset
+    var numColsOutput = 0
+    if (getRawPredictionCol != "") {
+      outputData = outputData.withColumn(getRawPredictionCol,
+        callUDF(predictRaw _, new VectorUDT, col(getFeaturesCol)))
+      numColsOutput += 1
+    }
+    if (getPredictionCol != "") {
+      val predUDF = if (getRawPredictionCol != "") {
+        callUDF(raw2prediction _, DoubleType, col(getRawPredictionCol))
+      } else {
+        callUDF(predict _, DoubleType, col(getFeaturesCol))
+      }
+      outputData = outputData.withColumn(getPredictionCol, predUDF)
+      numColsOutput += 1
+    }
+
     if (numColsOutput == 0) {
       logWarning(s"$uid: ClassificationModel.transform() was called as NOOP" +
         " since no output columns were set.")
@@ -116,22 +123,17 @@ abstract class ClassificationModel[FeaturesType, M <: ClassificationModel[Featur
   }
 
   /**
-   * :: DeveloperApi ::
-   *
    * Predict label for the given features.
    * This internal method is used to implement [[transform()]] and output [[predictionCol]].
    *
    * This default implementation for classification predicts the index of the maximum value
    * from [[predictRaw()]].
    */
-  @DeveloperApi
   override protected def predict(features: FeaturesType): Double = {
-    predictRaw(features).toArray.zipWithIndex.maxBy(_._1)._2
+    raw2prediction(predictRaw(features))
   }
 
   /**
-   * :: DeveloperApi ::
-   *
    * Raw prediction for each possible label.
    * The meaning of a "raw" prediction may vary between algorithms, but it intuitively gives
    * a measure of confidence in each possible label (where larger = more confident).
@@ -141,48 +143,12 @@ abstract class ClassificationModel[FeaturesType, M <: ClassificationModel[Featur
    *          This raw prediction may be any real number, where a larger value indicates greater
    *          confidence for that label.
    */
-  @DeveloperApi
   protected def predictRaw(features: FeaturesType): Vector
-}
-
-private[ml] object ClassificationModel {
 
   /**
-   * Added prediction column(s).  This is separated from [[ClassificationModel.transform()]]
-   * since it is used by [[org.apache.spark.ml.classification.ProbabilisticClassificationModel]].
-   * @param dataset  Input dataset
-   * @return (number of columns added, transformed dataset)
+   * Given a vector of raw predictions, select the predicted label.
+   * This may be overridden to support thresholds which favor particular labels.
+   * @return  predicted label
    */
-  def transformColumnsImpl[FeaturesType](
-      dataset: DataFrame,
-      model: ClassificationModel[FeaturesType, _]): (Int, DataFrame) = {
-
-    // Output selected columns only.
-    // This is a bit complicated since it tries to avoid repeated computation.
-    var tmpData = dataset
-    var numColsOutput = 0
-    if (model.getRawPredictionCol != "") {
-      // output raw prediction
-      val features2raw: FeaturesType => Vector = model.predictRaw
-      tmpData = tmpData.withColumn(model.getRawPredictionCol,
-        callUDF(features2raw, new VectorUDT, col(model.getFeaturesCol)))
-      numColsOutput += 1
-      if (model.getPredictionCol != "") {
-        val raw2pred: Vector => Double = (rawPred) => {
-          rawPred.toArray.zipWithIndex.maxBy(_._1)._2
-        }
-        tmpData = tmpData.withColumn(model.getPredictionCol,
-          callUDF(raw2pred, DoubleType, col(model.getRawPredictionCol)))
-        numColsOutput += 1
-      }
-    } else if (model.getPredictionCol != "") {
-      // output prediction
-      val features2pred: FeaturesType => Double = model.predict
-      tmpData = tmpData.withColumn(model.getPredictionCol,
-        callUDF(features2pred, DoubleType, col(model.getFeaturesCol)))
-      numColsOutput += 1
-    }
-    (numColsOutput, tmpData)
-  }
-
+  protected def raw2prediction(rawPrediction: Vector): Double = rawPrediction.toDense.argmax
 }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
index 419e5ba05d38a..dcebea1d4b015 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
@@ -18,10 +18,9 @@
 package org.apache.spark.ml.classification
 
 import org.apache.spark.annotation.AlphaComponent
-import org.apache.spark.ml.impl.estimator.{PredictionModel, Predictor}
-import org.apache.spark.ml.impl.tree._
+import org.apache.spark.ml.{PredictionModel, Predictor}
 import org.apache.spark.ml.param.ParamMap
-import org.apache.spark.ml.tree.{DecisionTreeModel, Node}
+import org.apache.spark.ml.tree.{TreeClassifierParams, DecisionTreeParams, DecisionTreeModel, Node}
 import org.apache.spark.ml.util.MetadataUtils
 import org.apache.spark.mllib.linalg.Vector
 import org.apache.spark.mllib.regression.LabeledPoint
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala
index 534ea95b1c538..ae51b05a0c42d 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala
@@ -21,11 +21,10 @@ import com.github.fommil.netlib.BLAS.{getInstance => blas}
 
 import org.apache.spark.Logging
 import org.apache.spark.annotation.AlphaComponent
-import org.apache.spark.ml.impl.estimator.{PredictionModel, Predictor}
-import org.apache.spark.ml.impl.tree._
+import org.apache.spark.ml.{PredictionModel, Predictor}
 import org.apache.spark.ml.param.{Param, ParamMap}
 import org.apache.spark.ml.regression.DecisionTreeRegressionModel
-import org.apache.spark.ml.tree.{DecisionTreeModel, TreeEnsembleModel}
+import org.apache.spark.ml.tree.{GBTParams, TreeClassifierParams, DecisionTreeModel, TreeEnsembleModel}
 import org.apache.spark.ml.util.MetadataUtils
 import org.apache.spark.mllib.linalg.Vector
 import org.apache.spark.mllib.regression.LabeledPoint
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
index b73be035e29b5..550369d18cfec 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -21,9 +21,8 @@ import org.apache.spark.annotation.AlphaComponent
 import org.apache.spark.ml.param._
 import org.apache.spark.ml.param.shared._
 import org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS
-import org.apache.spark.mllib.linalg.{BLAS, Vector, VectorUDT, Vectors}
+import org.apache.spark.mllib.linalg._
 import org.apache.spark.sql.DataFrame
-import org.apache.spark.sql.functions._
 import org.apache.spark.storage.StorageLevel
 
 /**
@@ -99,76 +98,17 @@ class LogisticRegressionModel private[ml] (
   /** @group setParam */
   def setThreshold(value: Double): this.type = set(threshold, value)
 
+  /** Margin (rawPrediction) for class label 1.  For binary classification only. */
   private val margin: Vector => Double = (features) => {
     BLAS.dot(features, weights) + intercept
   }
 
+  /** Score (probability) for class label 1.  For binary classification only. */
   private val score: Vector => Double = (features) => {
     val m = margin(features)
     1.0 / (1.0 + math.exp(-m))
   }
 
-  override def transform(dataset: DataFrame): DataFrame = {
-    // This is overridden (a) to be more efficient (avoiding re-computing values when creating
-    // multiple output columns) and (b) to handle threshold, which the abstractions do not use.
-    // TODO: We should abstract away the steps defined by UDFs below so that the abstractions
-    // can call whichever UDFs are needed to create the output columns.
-
-    // Check schema
-    transformSchema(dataset.schema, logging = true)
-
-    // Output selected columns only.
-    // This is a bit complicated since it tries to avoid repeated computation.
-    //   rawPrediction (-margin, margin)
-    //   probability (1.0-score, score)
-    //   prediction (max margin)
-    var tmpData = dataset
-    var numColsOutput = 0
-    if ($(rawPredictionCol) != "") {
-      val features2raw: Vector => Vector = (features) => predictRaw(features)
-      tmpData = tmpData.withColumn($(rawPredictionCol),
-        callUDF(features2raw, new VectorUDT, col($(featuresCol))))
-      numColsOutput += 1
-    }
-    if ($(probabilityCol) != "") {
-      if ($(rawPredictionCol) != "") {
-        val raw2prob = udf { (rawPreds: Vector) =>
-          val prob1 = 1.0 / (1.0 + math.exp(-rawPreds(1)))
-          Vectors.dense(1.0 - prob1, prob1): Vector
-        }
-        tmpData = tmpData.withColumn($(probabilityCol), raw2prob(col($(rawPredictionCol))))
-      } else {
-        val features2prob = udf { (features: Vector) => predictProbabilities(features) : Vector }
-        tmpData = tmpData.withColumn($(probabilityCol), features2prob(col($(featuresCol))))
-      }
-      numColsOutput += 1
-    }
-    if ($(predictionCol) != "") {
-      val t = $(threshold)
-      if ($(probabilityCol) != "") {
-        val predict = udf { probs: Vector =>
-          if (probs(1) > t) 1.0 else 0.0
-        }
-        tmpData = tmpData.withColumn($(predictionCol), predict(col($(probabilityCol))))
-      } else if ($(rawPredictionCol) != "") {
-        val predict = udf { rawPreds: Vector =>
-          val prob1 = 1.0 / (1.0 + math.exp(-rawPreds(1)))
-          if (prob1 > t) 1.0 else 0.0
-        }
-        tmpData = tmpData.withColumn($(predictionCol), predict(col($(rawPredictionCol))))
-      } else {
-        val predict = udf { features: Vector => this.predict(features) }
-        tmpData = tmpData.withColumn($(predictionCol), predict(col($(featuresCol))))
-      }
-      numColsOutput += 1
-    }
-    if (numColsOutput == 0) {
-      this.logWarning(s"$uid: LogisticRegressionModel.transform() was called as NOOP" +
-        " since no output columns were set.")
-    }
-    tmpData
-  }
-
   override val numClasses: Int = 2
 
   /**
@@ -179,17 +119,43 @@ class LogisticRegressionModel private[ml] (
     if (score(features) > getThreshold) 1 else 0
   }
 
-  override protected def predictProbabilities(features: Vector): Vector = {
-    val s = score(features)
-    Vectors.dense(1.0 - s, s)
+  override protected def raw2probabilityInPlace(rawPrediction: Vector): Vector = {
+    rawPrediction match {
+      case dv: DenseVector =>
+        var i = 0
+        while (i < dv.size) {
+          dv.values(i) = 1.0 / (1.0 + math.exp(-dv.values(i)))
+          i += 1
+        }
+        dv
+      case sv: SparseVector =>
+        throw new RuntimeException("Unexpected error in LogisticRegressionModel:" +
+          " raw2probabilitiesInPlace encountered SparseVector")
+    }
   }
 
   override protected def predictRaw(features: Vector): Vector = {
     val m = margin(features)
-    Vectors.dense(0.0, m)
+    Vectors.dense(-m, m)
   }
 
   override def copy(extra: ParamMap): LogisticRegressionModel = {
     copyValues(new LogisticRegressionModel(parent, weights, intercept), extra)
   }
+
+  override protected def raw2prediction(rawPrediction: Vector): Double = {
+    val t = getThreshold
+    val rawThreshold = if (t == 0.0) {
+      Double.NegativeInfinity
+    } else if (t == 1.0) {
+      Double.PositiveInfinity
+    } else {
+      Math.log(t / (1.0 - t))
+    }
+    if (rawPrediction(1) > rawThreshold) 1 else 0
+  }
+
+  override protected def probability2prediction(probability: Vector): Double = {
+    if (probability(1) > getThreshold) 1 else 0
+  }
 }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/ProbabilisticClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/ProbabilisticClassifier.scala
index 8519841c5c26c..330ae2938f4e0 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/ProbabilisticClassifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/ProbabilisticClassifier.scala
@@ -17,16 +17,16 @@
 
 package org.apache.spark.ml.classification
 
-import org.apache.spark.annotation.{AlphaComponent, DeveloperApi}
+import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.ml.param.shared._
 import org.apache.spark.ml.util.SchemaUtils
 import org.apache.spark.mllib.linalg.{Vector, VectorUDT}
 import org.apache.spark.sql.DataFrame
 import org.apache.spark.sql.functions._
-import org.apache.spark.sql.types.{DataType, StructType}
+import org.apache.spark.sql.types.{DoubleType, DataType, StructType}
 
 /**
- * Params for probabilistic classification.
+ * (private[classification])  Params for probabilistic classification.
  */
 private[classification] trait ProbabilisticClassifierParams
   extends ClassifierParams with HasProbabilityCol {
@@ -42,17 +42,15 @@ private[classification] trait ProbabilisticClassifierParams
 
 
 /**
- * :: AlphaComponent ::
+ * :: DeveloperApi ::
  *
  * Single-label binary or multiclass classifier which can output class conditional probabilities.
  *
  * @tparam FeaturesType  Type of input features.  E.g., [[Vector]]
  * @tparam E  Concrete Estimator type
  * @tparam M  Concrete Model type
- *
- * NOTE: This is currently private[spark] but will be made public later once it is stabilized.
  */
-@AlphaComponent
+@DeveloperApi
 private[spark] abstract class ProbabilisticClassifier[
     FeaturesType,
     E <: ProbabilisticClassifier[FeaturesType, E, M],
@@ -65,17 +63,15 @@ private[spark] abstract class ProbabilisticClassifier[
 
 
 /**
- * :: AlphaComponent ::
+ * :: DeveloperApi ::
  *
  * Model produced by a [[ProbabilisticClassifier]].
  * Classes are indexed {0, 1, ..., numClasses - 1}.
  *
  * @tparam FeaturesType  Type of input features.  E.g., [[Vector]]
  * @tparam M  Concrete Model type
- *
- * NOTE: This is currently private[spark] but will be made public later once it is stabilized.
  */
-@AlphaComponent
+@DeveloperApi
 private[spark] abstract class ProbabilisticClassificationModel[
     FeaturesType,
     M <: ProbabilisticClassificationModel[FeaturesType, M]]
@@ -95,39 +91,79 @@ private[spark] abstract class ProbabilisticClassificationModel[
    * @return transformed dataset
    */
   override def transform(dataset: DataFrame): DataFrame = {
-    // This default implementation should be overridden as needed.
-
-    // Check schema
     transformSchema(dataset.schema, logging = true)
 
-    val (numColsOutput, outputData) =
-      ClassificationModel.transformColumnsImpl[FeaturesType](dataset, this)
-
     // Output selected columns only.
-    if ($(probabilityCol) != "") {
-      // output probabilities
-      outputData.withColumn($(probabilityCol),
-        callUDF(predictProbabilities _, new VectorUDT, col($(featuresCol))))
-    } else {
-      if (numColsOutput == 0) {
-        this.logWarning(s"$uid: ProbabilisticClassificationModel.transform() was called as NOOP" +
-          " since no output columns were set.")
+    // This is a bit complicated since it tries to avoid repeated computation.
+    var outputData = dataset
+    var numColsOutput = 0
+    if ($(rawPredictionCol).nonEmpty) {
+      outputData = outputData.withColumn(getRawPredictionCol,
+        callUDF(predictRaw _, new VectorUDT, col(getFeaturesCol)))
+      numColsOutput += 1
+    }
+    if ($(probabilityCol).nonEmpty) {
+      val probUDF = if ($(rawPredictionCol).nonEmpty) {
+        callUDF(raw2probability _, new VectorUDT, col($(rawPredictionCol)))
+      } else {
+        callUDF(predictProbability _, new VectorUDT, col($(featuresCol)))
+      }
+      outputData = outputData.withColumn($(probabilityCol), probUDF)
+      numColsOutput += 1
+    }
+    if ($(predictionCol).nonEmpty) {
+      val predUDF = if ($(rawPredictionCol).nonEmpty) {
+        callUDF(raw2prediction _, DoubleType, col($(rawPredictionCol)))
+      } else if ($(probabilityCol).nonEmpty) {
+        callUDF(probability2prediction _, DoubleType, col($(probabilityCol)))
+      } else {
+        callUDF(predict _, DoubleType, col($(featuresCol)))
       }
-      outputData
+      outputData = outputData.withColumn($(predictionCol), predUDF)
+      numColsOutput += 1
+    }
+
+    if (numColsOutput == 0) {
+      this.logWarning(s"$uid: ProbabilisticClassificationModel.transform() was called as NOOP" +
+        " since no output columns were set.")
     }
+    outputData
   }
 
   /**
-   * :: DeveloperApi ::
+   * Estimate the probability of each class given the raw prediction,
+   * doing the computation in-place.
+   * These predictions are also called class conditional probabilities.
+   *
+   * This internal method is used to implement [[transform()]] and output [[probabilityCol]].
    *
+   * @return Estimated class conditional probabilities (modified input vector)
+   */
+  protected def raw2probabilityInPlace(rawPrediction: Vector): Vector
+
+  /** Non-in-place version of [[raw2probabilityInPlace()]] */
+  protected def raw2probability(rawPrediction: Vector): Vector = {
+    val probs = rawPrediction.copy
+    raw2probabilityInPlace(probs)
+  }
+
+  /**
    * Predict the probability of each class given the features.
    * These predictions are also called class conditional probabilities.
    *
-   * WARNING: Not all models output well-calibrated probability estimates!  These probabilities
-   *          should be treated as confidences, not precise probabilities.
-   *
    * This internal method is used to implement [[transform()]] and output [[probabilityCol]].
+   *
+   * @return Estimated class conditional probabilities
+   */
+  protected def predictProbability(features: FeaturesType): Vector = {
+    val rawPreds = predictRaw(features)
+    raw2probabilityInPlace(rawPreds)
+  }
+
+  /**
+   * Given a vector of class conditional probabilities, select the predicted label.
+   * This may be overridden to support thresholds which favor particular labels.
+   * @return  predicted label
    */
-  @DeveloperApi
-  protected def predictProbabilities(features: FeaturesType): Vector
+  protected def probability2prediction(probability: Vector): Double = probability.toDense.argmax
 }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala
index 17f59bb42e129..9954893f14359 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala
@@ -20,10 +20,9 @@ package org.apache.spark.ml.classification
 import scala.collection.mutable
 
 import org.apache.spark.annotation.AlphaComponent
-import org.apache.spark.ml.impl.estimator.{PredictionModel, Predictor}
-import org.apache.spark.ml.impl.tree._
+import org.apache.spark.ml.{PredictionModel, Predictor}
 import org.apache.spark.ml.param.ParamMap
-import org.apache.spark.ml.tree.{DecisionTreeModel, TreeEnsembleModel}
+import org.apache.spark.ml.tree.{RandomForestParams, TreeClassifierParams, DecisionTreeModel, TreeEnsembleModel}
 import org.apache.spark.ml.util.MetadataUtils
 import org.apache.spark.mllib.linalg.Vector
 import org.apache.spark.mllib.regression.LabeledPoint
diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala b/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala
index d379172e0bf53..0e1ff97a8bf60 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala
@@ -40,8 +40,10 @@ private[shared] object SharedParamsCodeGen {
       ParamDesc[String]("predictionCol", "prediction column name", Some("\"prediction\"")),
       ParamDesc[String]("rawPredictionCol", "raw prediction (a.k.a. confidence) column name",
         Some("\"rawPrediction\"")),
-      ParamDesc[String]("probabilityCol",
-        "column name for predicted class conditional probabilities", Some("\"probability\"")),
+      ParamDesc[String]("probabilityCol", "Column name for predicted class conditional" +
+        " probabilities. Note: Not all models output well-calibrated probability estimates!" +
+        " These probabilities should be treated as confidences, not precise probabilities.",
+        Some("\"probability\"")),
       ParamDesc[Double]("threshold",
         "threshold in binary classification prediction, in range [0, 1]",
         isValid = "ParamValidators.inRange(0, 1)"),
diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala b/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala
index fb1874ccfc8dc..87f86807c3c91 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala
@@ -128,10 +128,10 @@ private[ml] trait HasRawPredictionCol extends Params {
 private[ml] trait HasProbabilityCol extends Params {
 
   /**
-   * Param for column name for predicted class conditional probabilities.
+   * Param for Column name for predicted class conditional probabilities. Note: Not all models output well-calibrated probability estimates! These probabilities should be treated as confidences, not precise probabilities..
    * @group param
    */
-  final val probabilityCol: Param[String] = new Param[String](this, "probabilityCol", "column name for predicted class conditional probabilities")
+  final val probabilityCol: Param[String] = new Param[String](this, "probabilityCol", "Column name for predicted class conditional probabilities. Note: Not all models output well-calibrated probability estimates! These probabilities should be treated as confidences, not precise probabilities.")
 
   setDefault(probabilityCol, "probability")
 
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala
index b07c26fe79b36..f8f0b161a4812 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala
@@ -18,10 +18,9 @@
 package org.apache.spark.ml.regression
 
 import org.apache.spark.annotation.AlphaComponent
-import org.apache.spark.ml.impl.estimator.{PredictionModel, Predictor}
-import org.apache.spark.ml.impl.tree._
+import org.apache.spark.ml.{PredictionModel, Predictor}
 import org.apache.spark.ml.param.ParamMap
-import org.apache.spark.ml.tree.{DecisionTreeModel, Node}
+import org.apache.spark.ml.tree.{TreeRegressorParams, DecisionTreeParams, DecisionTreeModel, Node}
 import org.apache.spark.ml.util.MetadataUtils
 import org.apache.spark.mllib.linalg.Vector
 import org.apache.spark.mllib.regression.LabeledPoint
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala
index bc796958e4545..461905c12701a 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala
@@ -21,10 +21,9 @@ import com.github.fommil.netlib.BLAS.{getInstance => blas}
 
 import org.apache.spark.Logging
 import org.apache.spark.annotation.AlphaComponent
-import org.apache.spark.ml.impl.estimator.{PredictionModel, Predictor}
-import org.apache.spark.ml.impl.tree._
+import org.apache.spark.ml.{PredictionModel, Predictor}
 import org.apache.spark.ml.param.{Param, ParamMap}
-import org.apache.spark.ml.tree.{DecisionTreeModel, TreeEnsembleModel}
+import org.apache.spark.ml.tree.{GBTParams, TreeRegressorParams, DecisionTreeModel, TreeEnsembleModel}
 import org.apache.spark.ml.util.MetadataUtils
 import org.apache.spark.mllib.linalg.Vector
 import org.apache.spark.mllib.regression.LabeledPoint
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
index 66c475f2d9840..e63c9a3eead52 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
@@ -25,6 +25,7 @@ import breeze.optimize.{CachedDiffFunction, DiffFunction, LBFGS => BreezeLBFGS,
 
 import org.apache.spark.Logging
 import org.apache.spark.annotation.AlphaComponent
+import org.apache.spark.ml.PredictorParams
 import org.apache.spark.ml.param.ParamMap
 import org.apache.spark.ml.param.shared.{HasElasticNetParam, HasMaxIter, HasRegParam, HasTol}
 import org.apache.spark.mllib.linalg.{Vector, Vectors}
@@ -39,7 +40,7 @@ import org.apache.spark.util.StatCounter
 /**
  * Params for linear regression.
  */
-private[regression] trait LinearRegressionParams extends RegressorParams
+private[regression] trait LinearRegressionParams extends PredictorParams
   with HasRegParam with HasElasticNetParam with HasMaxIter with HasTol
 
 /**
@@ -240,7 +241,7 @@ class LinearRegressionModel private[ml] (
  *     + \bar{y} / \hat{y}||^2
  *   = 1/2n ||\sum_i w_i^\prime x_i - y / \hat{y} + offset||^2 = 1/2n diff^2
  * }}}
- * where w_i^\prime is the effective weights defined by w_i/\hat{x_i}, offset is
+ * where w_i^\prime^ is the effective weights defined by w_i/\hat{x_i}, offset is
  * {{{
  * - \sum_i (w_i/\hat{x_i})\bar{x_i} + \bar{y} / \hat{y}.
  * }}}, and diff is
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala
index 0468a1be1ba74..dbc628927433d 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala
@@ -18,10 +18,9 @@
 package org.apache.spark.ml.regression
 
 import org.apache.spark.annotation.AlphaComponent
-import org.apache.spark.ml.impl.estimator.{PredictionModel, Predictor}
-import org.apache.spark.ml.impl.tree.{RandomForestParams, TreeRegressorParams}
+import org.apache.spark.ml.{PredictionModel, Predictor}
 import org.apache.spark.ml.param.ParamMap
-import org.apache.spark.ml.tree.{DecisionTreeModel, TreeEnsembleModel}
+import org.apache.spark.ml.tree.{RandomForestParams, TreeRegressorParams, DecisionTreeModel, TreeEnsembleModel}
 import org.apache.spark.ml.util.MetadataUtils
 import org.apache.spark.mllib.linalg.Vector
 import org.apache.spark.mllib.regression.LabeledPoint
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/Regressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/Regressor.scala
index c6b3327db6ad3..c72ef29680329 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/Regressor.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/Regressor.scala
@@ -17,62 +17,40 @@
 
 package org.apache.spark.ml.regression
 
-import org.apache.spark.annotation.{AlphaComponent, DeveloperApi}
-import org.apache.spark.ml.impl.estimator.{PredictionModel, Predictor, PredictorParams}
+import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.ml.{PredictionModel, PredictorParams, Predictor}
 
-/**
- * :: DeveloperApi ::
- * Params for regression.
- * Currently empty, but may add functionality later.
- *
- * NOTE: This is currently private[spark] but will be made public later once it is stabilized.
- */
-@DeveloperApi
-private[spark] trait RegressorParams extends PredictorParams
 
 /**
- * :: AlphaComponent ::
+ * :: DeveloperApi ::
  *
  * Single-label regression
  *
  * @tparam FeaturesType  Type of input features.  E.g., [[org.apache.spark.mllib.linalg.Vector]]
  * @tparam Learner  Concrete Estimator type
  * @tparam M  Concrete Model type
- *
- * NOTE: This is currently private[spark] but will be made public later once it is stabilized.
  */
-@AlphaComponent
+@DeveloperApi
 private[spark] abstract class Regressor[
     FeaturesType,
     Learner <: Regressor[FeaturesType, Learner, M],
     M <: RegressionModel[FeaturesType, M]]
-  extends Predictor[FeaturesType, Learner, M]
-  with RegressorParams {
+  extends Predictor[FeaturesType, Learner, M] with PredictorParams {
 
   // TODO: defaultEvaluator (follow-up PR)
 }
 
 /**
- * :: AlphaComponent ::
+ * :: DeveloperApi ::
  *
  * Model produced by a [[Regressor]].
  *
  * @tparam FeaturesType  Type of input features.  E.g., [[org.apache.spark.mllib.linalg.Vector]]
  * @tparam M  Concrete Model type.
- *
- * NOTE: This is currently private[spark] but will be made public later once it is stabilized.
  */
-@AlphaComponent
-private[spark] abstract class RegressionModel[FeaturesType, M <: RegressionModel[FeaturesType, M]]
-  extends PredictionModel[FeaturesType, M] with RegressorParams {
-
-  /**
-   * :: DeveloperApi ::
-   *
-   * Predict real-valued label for the given features.
-   * This internal method is used to implement [[transform()]] and output [[predictionCol]].
-   */
-  @DeveloperApi
-  protected def predict(features: FeaturesType): Double
+@DeveloperApi
+abstract class RegressionModel[FeaturesType, M <: RegressionModel[FeaturesType, M]]
+  extends PredictionModel[FeaturesType, M] with PredictorParams {
 
+  // TODO: defaultEvaluator (follow-up PR)
 }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/impl/tree/treeParams.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala
similarity index 99%
rename from mllib/src/main/scala/org/apache/spark/ml/impl/tree/treeParams.scala
rename to mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala
index 0e225627d4ee3..816fcedf2efb3 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/impl/tree/treeParams.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala
@@ -15,10 +15,10 @@
  * limitations under the License.
  */
 
-package org.apache.spark.ml.impl.tree
+package org.apache.spark.ml.tree
 
 import org.apache.spark.annotation.DeveloperApi
-import org.apache.spark.ml.impl.estimator.PredictorParams
+import org.apache.spark.ml.PredictorParams
 import org.apache.spark.ml.param._
 import org.apache.spark.ml.param.shared.{HasMaxIter, HasSeed}
 import org.apache.spark.mllib.tree.configuration.{Algo => OldAlgo, BoostingStrategy => OldBoostingStrategy, Strategy => OldStrategy}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala
index 188d1e542b5b5..f6bcdf83cd337 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala
@@ -587,6 +587,28 @@ class DenseVector(val values: Array[Double]) extends Vector {
     }
     new SparseVector(size, ii, vv)
   }
+
+  /**
+   * Find the index of a maximal element.  Returns the first maximal element in case of a tie.
+   * Returns -1 if vector has length 0.
+   */
+  private[spark] def argmax: Int = {
+    if (size == 0) {
+      -1
+    } else {
+      var maxIdx = 0
+      var maxValue = values(0)
+      var i = 1
+      while (i < size) {
+        if (values(i) > maxValue) {
+          maxIdx = i
+          maxValue = values(i)
+        }
+        i += 1
+      }
+      maxIdx
+    }
+  }
 }
 
 object DenseVector {

From fbf1f342a02af65f69e0ee770a2b983c69e7c089 Mon Sep 17 00:00:00 2001
From: Andrew Or <andrew@databricks.com>
Date: Wed, 6 May 2015 17:08:39 -0700
Subject: [PATCH 03/48] [HOT FIX] [SPARK-7418] Ignore flaky
 SparkSubmitUtilsSuite test

---
 .../scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala
index 2df2597e058cd..da9578478bed9 100644
--- a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala
@@ -104,7 +104,7 @@ class SparkSubmitUtilsSuite extends FunSuite with BeforeAndAfterAll {
     assert(jarPath.indexOf(ivyPath) >= 0, "should use non-default ivy path")
   }
 
-  test("search for artifact at other repositories") {
+  ignore("search for artifact at other repositories") {
     val path = SparkSubmitUtils.resolveMavenCoordinates("com.agimatec:agimatec-validation:0.9.3",
       Option("https://oss.sonatype.org/content/repositories/agimatec/"), None, true)
     assert(path.indexOf("agimatec-validation") >= 0, "should find package. If it doesn't, check" +

From 4e930420c19ae7773b138dfc7db8fc03b4660251 Mon Sep 17 00:00:00 2001
From: Shivaram Venkataraman <shivaram@cs.berkeley.edu>
Date: Wed, 6 May 2015 17:28:11 -0700
Subject: [PATCH 04/48] [SPARK-6799] [SPARKR] Remove SparkR RDD examples, add
 dataframe examples

This PR also makes some of the DataFrame to RDD methods private as the RDD class is private in 1.4

cc rxin pwendell

Author: Shivaram Venkataraman <shivaram@cs.berkeley.edu>

Closes #5949 from shivaram/sparkr-examples and squashes the following commits:

6c42fdc [Shivaram Venkataraman] Remove SparkR RDD examples, add dataframe examples
---
 R/pkg/NAMESPACE                           |   4 -
 R/pkg/R/DataFrame.R                       |   2 +-
 examples/src/main/r/dataframe.R           |  54 +++++++++++
 examples/src/main/r/kmeans.R              |  93 -------------------
 examples/src/main/r/linear_solver_mnist.R | 107 ----------------------
 examples/src/main/r/logistic_regression.R |  62 -------------
 examples/src/main/r/pi.R                  |  46 ----------
 examples/src/main/r/wordcount.R           |  42 ---------
 8 files changed, 55 insertions(+), 355 deletions(-)
 create mode 100644 examples/src/main/r/dataframe.R
 delete mode 100644 examples/src/main/r/kmeans.R
 delete mode 100644 examples/src/main/r/linear_solver_mnist.R
 delete mode 100644 examples/src/main/r/logistic_regression.R
 delete mode 100644 examples/src/main/r/pi.R
 delete mode 100644 examples/src/main/r/wordcount.R

diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 528e6608c3c82..3fb92be0940b7 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -45,8 +45,6 @@ exportMethods("cache",
               "showDF",
               "sortDF",
               "take",
-              "toJSON",
-              "toRDD",
               "unionAll",
               "unpersist",
               "where",
@@ -95,14 +93,12 @@ export("cacheTable",
        "createExternalTable",
        "dropTempTable",
        "jsonFile",
-       "jsonRDD",
        "loadDF",
        "parquetFile",
        "sql",
        "table",
        "tableNames",
        "tables",
-       "toDF",
        "uncacheTable")
 
 export("sparkRSQL.init",
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 56c305d912587..47d92f141cc7d 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -272,7 +272,7 @@ setMethod("names",
 setMethod("registerTempTable",
           signature(x = "DataFrame", tableName = "character"),
           function(x, tableName) {
-              callJMethod(x@sdf, "registerTempTable", tableName)
+              invisible(callJMethod(x@sdf, "registerTempTable", tableName))
           })
 
 #' insertInto
diff --git a/examples/src/main/r/dataframe.R b/examples/src/main/r/dataframe.R
new file mode 100644
index 0000000000000..53b817144f6ac
--- /dev/null
+++ b/examples/src/main/r/dataframe.R
@@ -0,0 +1,54 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+library(SparkR)
+
+# Initialize SparkContext and SQLContext
+sc <- sparkR.init(appName="SparkR-DataFrame-example")
+sqlContext <- sparkRSQL.init(sc)
+
+# Create a simple local data.frame
+localDF <- data.frame(name=c("John", "Smith", "Sarah"), age=c(19, 23, 18))
+
+# Convert local data frame to a SparkR DataFrame
+df <- createDataFrame(sqlContext, localDF)
+
+# Print its schema
+printSchema(df)
+# root
+#  |-- name: string (nullable = true)
+#  |-- age: double (nullable = true)
+
+# Create a DataFrame from a JSON file
+path <- file.path(Sys.getenv("SPARK_HOME"), "examples/src/main/resources/people.json")
+peopleDF <- jsonFile(sqlContext, path)
+printSchema(peopleDF)
+
+# Register this DataFrame as a table.
+registerTempTable(peopleDF, "people")
+
+# SQL statements can be run by using the sql methods provided by sqlContext
+teenagers <- sql(sqlContext, "SELECT name FROM people WHERE age >= 13 AND age <= 19")
+
+# Call collect to get a local data.frame
+teenagersLocalDF <- collect(teenagers)
+
+# Print the teenagers in our dataset 
+print(teenagersLocalDF)
+
+# Stop the SparkContext now
+sparkR.stop()
diff --git a/examples/src/main/r/kmeans.R b/examples/src/main/r/kmeans.R
deleted file mode 100644
index 6e6b5cb93789c..0000000000000
--- a/examples/src/main/r/kmeans.R
+++ /dev/null
@@ -1,93 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-library(SparkR)
-
-# Logistic regression in Spark.
-# Note: unlike the example in Scala, a point here is represented as a vector of
-# doubles.
-
-parseVectors <-  function(lines) {
-  lines <- strsplit(as.character(lines) , " ", fixed = TRUE)
-  list(matrix(as.numeric(unlist(lines)), ncol = length(lines[[1]])))
-}
-
-dist.fun <- function(P, C) {
-  apply(
-    C,
-    1, 
-    function(x) { 
-      colSums((t(P) - x)^2)
-    }
-  )
-}
-
-closestPoint <-  function(P, C) {
-  max.col(-dist.fun(P, C))
-}
-# Main program
-
-args <- commandArgs(trailing = TRUE) 
-
-if (length(args) != 3) {
-  print("Usage: kmeans <file> <K> <convergeDist>")
-  q("no")
-}
-
-sc <- sparkR.init(appName = "RKMeans")
-K <- as.integer(args[[2]])
-convergeDist <- as.double(args[[3]])
-
-lines <- textFile(sc, args[[1]])
-points <- cache(lapplyPartition(lines, parseVectors))
-# kPoints <- take(points, K)
-kPoints <- do.call(rbind, takeSample(points, FALSE, K, 16189L))
-tempDist <- 1.0
-
-while (tempDist > convergeDist) {
-  closest <- lapplyPartition(
-    lapply(points,
-           function(p) {
-             cp <- closestPoint(p, kPoints); 
-             mapply(list, unique(cp), split.data.frame(cbind(1, p), cp), SIMPLIFY=FALSE)
-           }),
-    function(x) {do.call(c, x)
-    })
-  
-  pointStats <- reduceByKey(closest,
-                            function(p1, p2) {
-                              t(colSums(rbind(p1, p2)))
-                            },
-                            2L)
-  
-  newPoints <- do.call(
-    rbind,
-    collect(lapply(pointStats,
-                   function(tup) {
-                     point.sum <- tup[[2]][, -1]
-                     point.count <- tup[[2]][, 1]
-                     point.sum/point.count
-                   })))
-  
-  D <- dist.fun(kPoints, newPoints)
-  tempDist <- sum(D[cbind(1:3, max.col(-D))])
-  kPoints <- newPoints
-  cat("Finished iteration (delta = ", tempDist, ")\n")
-}
-
-cat("Final centers:\n")
-writeLines(unlist(lapply(kPoints, paste, collapse = " ")))
diff --git a/examples/src/main/r/linear_solver_mnist.R b/examples/src/main/r/linear_solver_mnist.R
deleted file mode 100644
index c864a4232d010..0000000000000
--- a/examples/src/main/r/linear_solver_mnist.R
+++ /dev/null
@@ -1,107 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-# Instructions: https://github.com/amplab-extras/SparkR-pkg/wiki/SparkR-Example:-Digit-Recognition-on-EC2
-
-library(SparkR)
-library(Matrix)
-
-args <- commandArgs(trailing = TRUE)
-
-# number of random features; default to 1100
-D <- ifelse(length(args) > 0, as.integer(args[[1]]), 1100)
-# number of partitions for training dataset
-trainParts <- 12
-# dimension of digits
-d <- 784
-# number of test examples
-NTrain <- 60000
-# number of training examples
-NTest <- 10000
-# scale of features
-gamma <- 4e-4
-
-sc <- sparkR.init(appName = "SparkR-LinearSolver")
-
-# You can also use HDFS path to speed things up:
-# hdfs://<master>/train-mnist-dense-with-labels.data
-file <- textFile(sc, "/data/train-mnist-dense-with-labels.data", trainParts)
-
-W <- gamma * matrix(nrow=D, ncol=d, data=rnorm(D*d))
-b <- 2 * pi * matrix(nrow=D, ncol=1, data=runif(D))
-broadcastW <- broadcast(sc, W)
-broadcastB <- broadcast(sc, b)
-
-includePackage(sc, Matrix)
-numericLines <- lapplyPartitionsWithIndex(file,
-                       function(split, part) {
-                         matList <- sapply(part, function(line) {
-                           as.numeric(strsplit(line, ",", fixed=TRUE)[[1]])
-                         }, simplify=FALSE)
-                         mat <- Matrix(ncol=d+1, data=unlist(matList, F, F),
-                                       sparse=T, byrow=T)
-                         mat
-                       })
-
-featureLabels <- cache(lapplyPartition(
-    numericLines,
-    function(part) {
-      label <- part[,1]
-      mat <- part[,-1]
-      ones <- rep(1, nrow(mat))
-      features <- cos(
-        mat %*% t(value(broadcastW)) + (matrix(ncol=1, data=ones) %*% t(value(broadcastB))))
-      onesMat <- Matrix(ones)
-      featuresPlus <- cBind(features, onesMat)
-      labels <- matrix(nrow=nrow(mat), ncol=10, data=-1)
-      for (i in 1:nrow(mat)) {
-        labels[i, label[i]] <- 1
-      }
-      list(label=labels, features=featuresPlus)
-  }))
-
-FTF <- Reduce("+", collect(lapplyPartition(featureLabels,
-    function(part) {
-      t(part$features) %*% part$features
-    }), flatten=F))
-
-FTY <- Reduce("+", collect(lapplyPartition(featureLabels,
-    function(part) {
-      t(part$features) %*% part$label
-    }), flatten=F))
-
-# solve for the coefficient matrix
-C <- solve(FTF, FTY)
-
-test <- Matrix(as.matrix(read.csv("/data/test-mnist-dense-with-labels.data",
-                         header=F), sparse=T))
-testData <- test[,-1]
-testLabels <- matrix(ncol=1, test[,1])
-
-err <- 0
-
-# contstruct the feature maps for all examples from this digit
-featuresTest <- cos(testData %*% t(value(broadcastW)) +
-    (matrix(ncol=1, data=rep(1, NTest)) %*% t(value(broadcastB))))
-featuresTest <- cBind(featuresTest, Matrix(rep(1, NTest)))
-
-# extract the one vs. all assignment
-results <- featuresTest %*% C
-labelsGot <- apply(results, 1, which.max)
-err <- sum(testLabels != labelsGot) / nrow(testLabels)
-
-cat("\nFinished running. The error rate is: ", err, ".\n")
diff --git a/examples/src/main/r/logistic_regression.R b/examples/src/main/r/logistic_regression.R
deleted file mode 100644
index 2a86aa98160d3..0000000000000
--- a/examples/src/main/r/logistic_regression.R
+++ /dev/null
@@ -1,62 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-library(SparkR)
-
-args <- commandArgs(trailing = TRUE)
-
-if (length(args) != 3) {
-  print("Usage: logistic_regression <file> <iters> <dimension>")
-  q("no")
-}
-
-# Initialize Spark context
-sc <- sparkR.init(appName = "LogisticRegressionR")
-iterations <- as.integer(args[[2]])
-D <- as.integer(args[[3]])
-
-readPartition <- function(part){
-  part = strsplit(part, " ", fixed = T)
-  list(matrix(as.numeric(unlist(part)), ncol = length(part[[1]])))
-}
-
-# Read data points and convert each partition to a matrix
-points <- cache(lapplyPartition(textFile(sc, args[[1]]), readPartition))
-
-# Initialize w to a random value
-w <- runif(n=D, min = -1, max = 1)
-cat("Initial w: ", w, "\n")
-
-# Compute logistic regression gradient for a matrix of data points
-gradient <- function(partition) {
-  partition = partition[[1]]
-  Y <- partition[, 1]  # point labels (first column of input file)
-  X <- partition[, -1] # point coordinates
-
-  # For each point (x, y), compute gradient function
-  dot <- X %*% w
-  logit <- 1 / (1 + exp(-Y * dot))
-  grad <- t(X) %*% ((logit - 1) * Y)
-  list(grad)
-}
-
-for (i in 1:iterations) {
-  cat("On iteration ", i, "\n")
-  w <- w - reduce(lapplyPartition(points, gradient), "+")
-}
-
-cat("Final w: ", w, "\n")
diff --git a/examples/src/main/r/pi.R b/examples/src/main/r/pi.R
deleted file mode 100644
index aa7a833e147a0..0000000000000
--- a/examples/src/main/r/pi.R
+++ /dev/null
@@ -1,46 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-library(SparkR)
-
-args <- commandArgs(trailing = TRUE)
-
-sc <- sparkR.init(appName = "PiR")
-
-slices <- ifelse(length(args) > 1, as.integer(args[[2]]), 2)
-
-n <- 100000 * slices
-
-piFunc <- function(elem) {
-  rands <- runif(n = 2, min = -1, max = 1)
-  val <- ifelse((rands[1]^2 + rands[2]^2) < 1, 1.0, 0.0)
-  val
-}
-
-
-piFuncVec <- function(elems) {
-  message(length(elems))
-  rands1 <- runif(n = length(elems), min = -1, max = 1)
-  rands2 <- runif(n = length(elems), min = -1, max = 1)
-  val <- ifelse((rands1^2 + rands2^2) < 1, 1.0, 0.0)
-  sum(val)
-}
-
-rdd <- parallelize(sc, 1:n, slices)
-count <- reduce(lapplyPartition(rdd, piFuncVec), sum)
-cat("Pi is roughly", 4.0 * count / n, "\n")
-cat("Num elements in RDD ", count(rdd), "\n")
diff --git a/examples/src/main/r/wordcount.R b/examples/src/main/r/wordcount.R
deleted file mode 100644
index b734cb0ecf55b..0000000000000
--- a/examples/src/main/r/wordcount.R
+++ /dev/null
@@ -1,42 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-library(SparkR)
-
-args <- commandArgs(trailing = TRUE)
-
-if (length(args) != 1) {
-  print("Usage: wordcount <file>")
-  q("no")
-}
-
-# Initialize Spark context
-sc <- sparkR.init(appName = "RwordCount")
-lines <- textFile(sc, args[[1]])
-
-words <- flatMap(lines,
-                 function(line) {
-                   strsplit(line, " ")[[1]]
-                 })
-wordCount <- lapply(words, function(word) { list(word, 1L) })
-
-counts <- reduceByKey(wordCount, "+", 2L)
-output <- collect(counts)
-
-for (wordcount in output) {
-  cat(wordcount[[1]], ": ", wordcount[[2]], "\n")
-}

From 316a5c0423ba3688cacd3acc3c5b5571e8a71d1d Mon Sep 17 00:00:00 2001
From: jerryshao <saisai.shao@intel.com>
Date: Wed, 6 May 2015 17:44:43 -0700
Subject: [PATCH 05/48] [SPARK-7396] [STREAMING] [EXAMPLE] Update
 KafkaWordCountProducer to use new Producer API

Otherwise it will throw exception:

```
Exception in thread "main" kafka.common.FailedToSendMessageException: Failed to send messages after 3 tries.
	at kafka.producer.async.DefaultEventHandler.handle(DefaultEventHandler.scala:90)
	at kafka.producer.Producer.send(Producer.scala:77)
	at org.apache.spark.examples.streaming.KafkaWordCountProducer$.main(KafkaWordCount.scala:96)
	at org.apache.spark.examples.streaming.KafkaWordCountProducer.main(KafkaWordCount.scala)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:606)
	at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:623)
	at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:169)
	at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:192)
	at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:111)
	at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
```

Author: jerryshao <saisai.shao@intel.com>

Closes #5936 from jerryshao/SPARK-7396 and squashes the following commits:

270bbe2 [jerryshao] Fix Kafka Produce throw Exception issue
---
 .../examples/streaming/KafkaWordCount.scala   | 24 ++++++++++---------
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/examples/scala-2.10/src/main/scala/org/apache/spark/examples/streaming/KafkaWordCount.scala b/examples/scala-2.10/src/main/scala/org/apache/spark/examples/streaming/KafkaWordCount.scala
index 387c0e421334b..f407367a54f6c 100644
--- a/examples/scala-2.10/src/main/scala/org/apache/spark/examples/streaming/KafkaWordCount.scala
+++ b/examples/scala-2.10/src/main/scala/org/apache/spark/examples/streaming/KafkaWordCount.scala
@@ -17,9 +17,9 @@
 
 package org.apache.spark.examples.streaming
 
-import java.util.Properties
+import java.util.HashMap
 
-import kafka.producer._
+import org.apache.kafka.clients.producer.{ProducerConfig, KafkaProducer, ProducerRecord}
 
 import org.apache.spark.streaming._
 import org.apache.spark.streaming.kafka._
@@ -77,23 +77,25 @@ object KafkaWordCountProducer {
     val Array(brokers, topic, messagesPerSec, wordsPerMessage) = args
 
     // Zookeeper connection properties
-    val props = new Properties()
-    props.put("metadata.broker.list", brokers)
-    props.put("serializer.class", "kafka.serializer.StringEncoder")
+    val props = new HashMap[String, Object]()
+    props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, brokers)
+    props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG,
+      "org.apache.kafka.common.serialization.StringSerializer")
+    props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG,
+      "org.apache.kafka.common.serialization.StringSerializer")
 
-    val config = new ProducerConfig(props)
-    val producer = new Producer[String, String](config)
+    val producer = new KafkaProducer[String, String](props)
 
     // Send some messages
     while(true) {
-      val messages = (1 to messagesPerSec.toInt).map { messageNum =>
+      (1 to messagesPerSec.toInt).foreach { messageNum =>
         val str = (1 to wordsPerMessage.toInt).map(x => scala.util.Random.nextInt(10).toString)
           .mkString(" ")
 
-        new KeyedMessage[String, String](topic, str)
-      }.toArray
+        val message = new ProducerRecord[String, String](topic, null, str)
+        producer.send(message)
+      }
 
-      producer.send(messages: _*)
       Thread.sleep(100)
     }
   }

From 8fa6829f5e6d8b8f33097c3f20fcb96198d4031b Mon Sep 17 00:00:00 2001
From: Andrew Or <andrew@databricks.com>
Date: Wed, 6 May 2015 17:52:34 -0700
Subject: [PATCH 06/48] [SPARK-7371] [SPARK-7377] [SPARK-7408] DAG
 visualization addendum (#5729)

This is a follow-up patch for #5729.

**[SPARK-7408]** Move as much style code from JS to CSS as possible
**[SPARK-7377]** Fix JS error if a job / stage contains only one RDD
**[SPARK-7371]** Decrease emphasis on RDD on stage page as requested by mateiz pwendell

This patch also includes general code clean up.

<img src="https://issues.apache.org/jira/secure/attachment/12730992/before-after.png" width="500px"></img>

Author: Andrew Or <andrew@databricks.com>

Closes #5954 from andrewor14/viz-emphasize-rdd and squashes the following commits:

3c0d4f0 [Andrew Or] Guard against JS error by rendering arrows only if needed
f23e15b [Andrew Or] Merge branch 'master' of github.com:apache/spark into viz-emphasize-rdd
565801f [Andrew Or] Clean up code
9dab5f0 [Andrew Or] Move styling from JS to CSS + clean up code
107c0b6 [Andrew Or] Tweak background color, stroke width, font size etc.
1610c62 [Andrew Or] Implement cluster padding for stage page
---
 .../apache/spark/ui/static/dagre-d3.min.js    |  14 +-
 .../apache/spark/ui/static/spark-dag-viz.css  |  83 ++++
 .../apache/spark/ui/static/spark-dag-viz.js   | 357 ++++++++++--------
 .../scala/org/apache/spark/ui/UIUtils.scala   |  14 +-
 .../spark/ui/scope/RDDOperationGraph.scala    |  19 +-
 5 files changed, 310 insertions(+), 177 deletions(-)
 create mode 100644 core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.css

diff --git a/core/src/main/resources/org/apache/spark/ui/static/dagre-d3.min.js b/core/src/main/resources/org/apache/spark/ui/static/dagre-d3.min.js
index 6d2da25024a83..6beb7db855ca0 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/dagre-d3.min.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/dagre-d3.min.js
@@ -1,4 +1,4 @@
-/*v0.4.3 with 1 additional commit (see http://github.com/andrewor14/dagre-d3)*/(function(f){if(typeof exports==="object"&&typeof module!=="undefined"){module.exports=f()}else if(typeof define==="function"&&define.amd){define([],f)}else{var g;if(typeof window!=="undefined"){g=window}else if(typeof global!=="undefined"){g=global}else if(typeof self!=="undefined"){g=self}else{g=this}g.dagreD3=f()}})(function(){var define,module,exports;return function e(t,n,r){function s(o,u){if(!n[o]){if(!t[o]){var a=typeof require=="function"&&require;if(!u&&a)return a(o,!0);if(i)return i(o,!0);var f=new Error("Cannot find module '"+o+"'");throw f.code="MODULE_NOT_FOUND",f}var l=n[o]={exports:{}};t[o][0].call(l.exports,function(e){var n=t[o][1][e];return s(n?n:e)},l,l.exports,e,t,n,r)}return n[o].exports}var i=typeof require=="function"&&require;for(var o=0;o<r.length;o++)s(r[o]);return s}({1:[function(require,module,exports){/**
+!function(e){if("object"==typeof exports&&"undefined"!=typeof module)module.exports=e();else if("function"==typeof define&&define.amd)define([],e);else{var f;"undefined"!=typeof window?f=window:"undefined"!=typeof global?f=global:"undefined"!=typeof self&&(f=self),f.dagreD3=e()}}(function(){var define,module,exports;return function e(t,n,r){function s(o,u){if(!n[o]){if(!t[o]){var a=typeof require=="function"&&require;if(!u&&a)return a(o,!0);if(i)return i(o,!0);var f=new Error("Cannot find module '"+o+"'");throw f.code="MODULE_NOT_FOUND",f}var l=n[o]={exports:{}};t[o][0].call(l.exports,function(e){var n=t[o][1][e];return s(n?n:e)},l,l.exports,e,t,n,r)}return n[o].exports}var i=typeof require=="function"&&require;for(var o=0;o<r.length;o++)s(r[o]);return s}({1:[function(require,module,exports){/**
  * @license
  * Copyright (c) 2012-2013 Chris Pettitt
  *
@@ -20,10 +20,12 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  * THE SOFTWARE.
  */
-module.exports={graphlib:require("./lib/graphlib"),dagre:require("./lib/dagre"),intersect:require("./lib/intersect"),render:require("./lib/render"),util:require("./lib/util"),version:require("./lib/version")}},{"./lib/dagre":8,"./lib/graphlib":9,"./lib/intersect":10,"./lib/render":23,"./lib/util":25,"./lib/version":26}],2:[function(require,module,exports){var util=require("./util");module.exports={"default":normal,normal:normal,vee:vee,undirected:undirected};function normal(parent,id,edge,type){var marker=parent.append("marker").attr("id",id).attr("viewBox","0 0 10 10").attr("refX",9).attr("refY",5).attr("markerUnits","strokeWidth").attr("markerWidth",8).attr("markerHeight",6).attr("orient","auto");var path=marker.append("path").attr("d","M 0 0 L 10 5 L 0 10 z").style("stroke-width",1).style("stroke-dasharray","1,0");util.applyStyle(path,edge[type+"Style"])}function vee(parent,id,edge,type){var marker=parent.append("marker").attr("id",id).attr("viewBox","0 0 10 10").attr("refX",9).attr("refY",5).attr("markerUnits","strokeWidth").attr("markerWidth",8).attr("markerHeight",6).attr("orient","auto");var path=marker.append("path").attr("d","M 0 0 L 10 5 L 0 10 L 4 5 z").style("stroke-width",1).style("stroke-dasharray","1,0");util.applyStyle(path,edge[type+"Style"])}function undirected(parent,id,edge,type){var marker=parent.append("marker").attr("id",id).attr("viewBox","0 0 10 10").attr("refX",9).attr("refY",5).attr("markerUnits","strokeWidth").attr("markerWidth",8).attr("markerHeight",6).attr("orient","auto");var path=marker.append("path").attr("d","M 0 5 L 10 5").style("stroke-width",1).style("stroke-dasharray","1,0");util.applyStyle(path,edge[type+"Style"])}},{"./util":25}],3:[function(require,module,exports){var util=require("./util");module.exports=createClusters;function createClusters(selection,g){var clusters=g.nodes().filter(function(v){return util.isSubgraph(g,v)}),svgClusters=selection.selectAll("g.cluster").data(clusters,function(v){return v});svgClusters.enter().append("g").attr("id",function(v){return"cluster_"+v.replace(/^cluster/,"")}).attr("name",function(v){return g.node(v).label}).attr("class","cluster").style("opacity",0).append("rect");util.applyTransition(svgClusters.exit(),g).style("opacity",0).remove();util.applyTransition(svgClusters,g).style("opacity",1);util.applyTransition(svgClusters.selectAll("rect"),g).attr("width",function(v){return g.node(v).width}).attr("height",function(v){return g.node(v).height}).attr("x",function(v){var node=g.node(v);return node.x-node.width/2}).attr("y",function(v){var node=g.node(v);return node.y-node.height/2})}},{"./util":25}],4:[function(require,module,exports){"use strict";var _=require("./lodash"),addLabel=require("./label/add-label"),util=require("./util"),d3=require("./d3");module.exports=createEdgeLabels;function createEdgeLabels(selection,g){var svgEdgeLabels=selection.selectAll("g.edgeLabel").data(g.edges(),function(e){return util.edgeToId(e)}).classed("update",true);svgEdgeLabels.selectAll("*").remove();svgEdgeLabels.enter().append("g").classed("edgeLabel",true).style("opacity",0);svgEdgeLabels.each(function(e){var edge=g.edge(e),label=addLabel(d3.select(this),g.edge(e),0,0).classed("label",true),bbox=label.node().getBBox();if(edge.labelId){label.attr("id",edge.labelId)}if(!_.has(edge,"width")){edge.width=bbox.width}if(!_.has(edge,"height")){edge.height=bbox.height}});util.applyTransition(svgEdgeLabels.exit(),g).style("opacity",0).remove();return svgEdgeLabels}},{"./d3":7,"./label/add-label":18,"./lodash":20,"./util":25}],5:[function(require,module,exports){"use strict";var _=require("./lodash"),intersectNode=require("./intersect/intersect-node"),util=require("./util"),d3=require("./d3");module.exports=createEdgePaths;function createEdgePaths(selection,g,arrows){var svgPaths=selection.selectAll("g.edgePath").data(g.edges(),function(e){return util.edgeToId(e)}).classed("update",true);enter(svgPaths,g);exit(svgPaths,g);util.applyTransition(svgPaths,g).style("opacity",1);svgPaths.each(function(e){var domEdge=d3.select(this);var edge=g.edge(e);edge.elem=this;if(edge.id){domEdge.attr("id",edge.id)}util.applyClass(domEdge,edge["class"],(domEdge.classed("update")?"update ":"")+"edgePath")});svgPaths.selectAll("path.path").each(function(e){var edge=g.edge(e);edge.arrowheadId=_.uniqueId("arrowhead");var domEdge=d3.select(this).attr("marker-end",function(){return"url(#"+edge.arrowheadId+")"}).style("fill","none");util.applyTransition(domEdge,g).attr("d",function(e){return calcPoints(g,e)});util.applyStyle(domEdge,edge.style)});svgPaths.selectAll("defs *").remove();svgPaths.selectAll("defs").each(function(e){var edge=g.edge(e),arrowhead=arrows[edge.arrowhead];arrowhead(d3.select(this),edge.arrowheadId,edge,"arrowhead")});return svgPaths}function calcPoints(g,e){var edge=g.edge(e),tail=g.node(e.v),head=g.node(e.w),points=edge.points.slice(1,edge.points.length-1);points.unshift(intersectNode(tail,points[0]));points.push(intersectNode(head,points[points.length-1]));return createLine(edge,points)}function createLine(edge,points){var line=d3.svg.line().x(function(d){return d.x}).y(function(d){return d.y});if(_.has(edge,"lineInterpolate")){line.interpolate(edge.lineInterpolate)}if(_.has(edge,"lineTension")){line.tension(Number(edge.lineTension))}return line(points)}function getCoords(elem){var bbox=elem.getBBox(),matrix=elem.getTransformToElement(elem.ownerSVGElement).translate(bbox.width/2,bbox.height/2);return{x:matrix.e,y:matrix.f}}function enter(svgPaths,g){var svgPathsEnter=svgPaths.enter().append("g").attr("class","edgePath").style("opacity",0);svgPathsEnter.append("path").attr("class","path").attr("d",function(e){var edge=g.edge(e),sourceElem=g.node(e.v).elem,points=_.range(edge.points.length).map(function(){return getCoords(sourceElem)});return createLine(edge,points)});svgPathsEnter.append("defs")}function exit(svgPaths,g){var svgPathExit=svgPaths.exit();util.applyTransition(svgPathExit,g).style("opacity",0).remove();util.applyTransition(svgPathExit.select("path.path"),g).attr("d",function(e){var source=g.node(e.v);if(source){var points=_.range(this.pathSegList.length).map(function(){return source});return createLine({},points)}else{return d3.select(this).attr("d")}})}},{"./d3":7,"./intersect/intersect-node":14,"./lodash":20,"./util":25}],6:[function(require,module,exports){"use strict";var _=require("./lodash"),addLabel=require("./label/add-label"),util=require("./util"),d3=require("./d3");module.exports=createNodes;function createNodes(selection,g,shapes){var simpleNodes=g.nodes().filter(function(v){return!util.isSubgraph(g,v)});var svgNodes=selection.selectAll("g.node").data(simpleNodes,function(v){return v}).classed("update",true);svgNodes.selectAll("*").remove();svgNodes.enter().append("g").attr("id",function(v){return"node_"+v}).attr("name",function(v){return g.node(v).label}).attr("class","node").style("opacity",0);svgNodes.each(function(v){var node=g.node(v),thisGroup=d3.select(this),labelGroup=thisGroup.append("g").attr("class","label"),labelDom=addLabel(labelGroup,node),shape=shapes[node.shape],bbox=_.pick(labelDom.node().getBBox(),"width","height");node.elem=this;if(node.id){thisGroup.attr("id",node.id)}if(node.labelId){labelGroup.attr("id",node.labelId)}util.applyClass(thisGroup,node["class"],(thisGroup.classed("update")?"update ":"")+"node");if(_.has(node,"width")){bbox.width=node.width}if(_.has(node,"height")){bbox.height=node.height}bbox.width+=node.paddingLeft+node.paddingRight;bbox.height+=node.paddingTop+node.paddingBottom;labelGroup.attr("transform","translate("+(node.paddingLeft-node.paddingRight)/2+","+(node.paddingTop-node.paddingBottom)/2+")");var shapeSvg=shape(d3.select(this),bbox,node);util.applyStyle(shapeSvg,node.style);var shapeBBox=shapeSvg.node().getBBox();node.width=shapeBBox.width;node.height=shapeBBox.height});util.applyTransition(svgNodes.exit(),g).style("opacity",0).remove();return svgNodes}},{"./d3":7,"./label/add-label":18,"./lodash":20,"./util":25}],7:[function(require,module,exports){module.exports=window.d3},{}],8:[function(require,module,exports){var dagre;if(require){try{dagre=require("dagre")}catch(e){}}if(!dagre){dagre=window.dagre}module.exports=dagre},{dagre:27}],9:[function(require,module,exports){var graphlib;if(require){try{graphlib=require("graphlib")}catch(e){}}if(!graphlib){graphlib=window.graphlib}module.exports=graphlib},{graphlib:57}],10:[function(require,module,exports){module.exports={node:require("./intersect-node"),circle:require("./intersect-circle"),ellipse:require("./intersect-ellipse"),polygon:require("./intersect-polygon"),rect:require("./intersect-rect")}},{"./intersect-circle":11,"./intersect-ellipse":12,"./intersect-node":14,"./intersect-polygon":15,"./intersect-rect":16}],11:[function(require,module,exports){var intersectEllipse=require("./intersect-ellipse");module.exports=intersectCircle;function intersectCircle(node,rx,point){return intersectEllipse(node,rx,rx,point)}},{"./intersect-ellipse":12}],12:[function(require,module,exports){module.exports=intersectEllipse;function intersectEllipse(node,rx,ry,point){var cx=node.x;var cy=node.y;var px=cx-point.x;var py=cy-point.y;var det=Math.sqrt(rx*rx*py*py+ry*ry*px*px);var dx=Math.abs(rx*ry*px/det);if(point.x<cx){dx=-dx}var dy=Math.abs(rx*ry*py/det);if(point.y<cy){dy=-dy}return{x:cx+dx,y:cy+dy}}},{}],13:[function(require,module,exports){module.exports=intersectLine;function intersectLine(p1,p2,q1,q2){var a1,a2,b1,b2,c1,c2;var r1,r2,r3,r4;var denom,offset,num;var x,y;a1=p2.y-p1.y;b1=p1.x-p2.x;c1=p2.x*p1.y-p1.x*p2.y;r3=a1*q1.x+b1*q1.y+c1;r4=a1*q2.x+b1*q2.y+c1;if(r3!==0&&r4!==0&&sameSign(r3,r4)){return}a2=q2.y-q1.y;b2=q1.x-q2.x;c2=q2.x*q1.y-q1.x*q2.y;r1=a2*p1.x+b2*p1.yy+c2;r2=a2*p2.x+b2*p2.y+c2;if(r1!==0&&r2!==0&&sameSign(r1,r2)){return}denom=a1*b2-a2*b1;if(denom===0){return}offset=Math.abs(denom/2);num=b1*c2-b2*c1;x=num<0?(num-offset)/denom:(num+offset)/denom;num=a2*c1-a1*c2;y=num<0?(num-offset)/denom:(num+offset)/denom;return{x:x,y:y}}function sameSign(r1,r2){return r1*r2>0}},{}],14:[function(require,module,exports){module.exports=intersectNode;function intersectNode(node,point){return node.intersect(point)}},{}],15:[function(require,module,exports){var intersectLine=require("./intersect-line");module.exports=intersectPolygon;function intersectPolygon(node,polyPoints,point){var x1=node.x;var y1=node.y;var intersections=[];var minX=Number.POSITIVE_INFINITY,minY=Number.POSITIVE_INFINITY;polyPoints.forEach(function(entry){minX=Math.min(minX,entry.x);minY=Math.min(minY,entry.y)});var left=x1-node.width/2-minX;var top=y1-node.height/2-minY;for(var i=0;i<polyPoints.length;i++){var p1=polyPoints[i];var p2=polyPoints[i<polyPoints.length-1?i+1:0];var intersect=intersectLine(node,point,{x:left+p1.x,y:top+p1.y},{x:left+p2.x,y:top+p2.y});if(intersect){intersections.push(intersect)}}if(!intersections.length){console.log("NO INTERSECTION FOUND, RETURN NODE CENTER",node);return node}if(intersections.length>1){intersections.sort(function(p,q){var pdx=p.x-point.x,pdy=p.y-point.y,distp=Math.sqrt(pdx*pdx+pdy*pdy),qdx=q.x-point.x,qdy=q.y-point.y,distq=Math.sqrt(qdx*qdx+qdy*qdy);return distp<distq?-1:distp===distq?0:1})}return intersections[0]}},{"./intersect-line":13}],16:[function(require,module,exports){module.exports=intersectRect;function intersectRect(node,point){var x=node.x;var y=node.y;var dx=point.x-x;var dy=point.y-y;var w=node.width/2;var h=node.height/2;var sx,sy;if(Math.abs(dy)*w>Math.abs(dx)*h){if(dy<0){h=-h}sx=dy===0?0:h*dx/dy;sy=h}else{if(dx<0){w=-w}sx=w;sy=dx===0?0:w*dy/dx}return{x:x+sx,y:y+sy}}},{}],17:[function(require,module,exports){var util=require("../util");module.exports=addHtmlLabel;function addHtmlLabel(root,node){var fo=root.append("foreignObject").attr("width","100000");var div=fo.append("xhtml:div");var label=node.label;switch(typeof label){case"function":div.insert(label);break;case"object":div.insert(function(){return label});break;default:div.html(label)}util.applyStyle(div,node.labelStyle);div.style("display","inline-block");div.style("white-space","nowrap");var w,h;div.each(function(){w=this.clientWidth;h=this.clientHeight});fo.attr("width",w).attr("height",h);return fo}},{"../util":25}],18:[function(require,module,exports){var addTextLabel=require("./add-text-label"),addHtmlLabel=require("./add-html-label");module.exports=addLabel;function addLabel(root,node){var label=node.label;var labelSvg=root.append("g");if(typeof label!=="string"||node.labelType==="html"){addHtmlLabel(labelSvg,node)}else{addTextLabel(labelSvg,node)}var labelBBox=labelSvg.node().getBBox();labelSvg.attr("transform","translate("+-labelBBox.width/2+","+-labelBBox.height/2+")");return labelSvg}},{"./add-html-label":17,"./add-text-label":19}],19:[function(require,module,exports){var util=require("../util");module.exports=addTextLabel;function addTextLabel(root,node){var domNode=root.append("text");var lines=processEscapeSequences(node.label).split("\n");for(var i=0;i<lines.length;i++){domNode.append("tspan").attr("xml:space","preserve").attr("dy","1em").attr("x","1").text(lines[i])}util.applyStyle(domNode,node.labelStyle);return domNode}function processEscapeSequences(text){var newText="",escaped=false,ch;for(var i=0;i<text.length;++i){ch=text[i];if(escaped){switch(ch){case"n":newText+="\n";break;default:newText+=ch}escaped=false}else if(ch==="\\"){escaped=true}else{newText+=ch}}return newText}},{"../util":25}],20:[function(require,module,exports){var lodash;if(require){try{lodash=require("lodash")}catch(e){}}if(!lodash){lodash=window._}module.exports=lodash},{lodash:77}],21:[function(require,module,exports){"use strict";var util=require("./util"),d3=require("./d3"),_=require("./lodash");module.exports=positionEdgeLabels;function positionEdgeLabels(selection,g){var created=selection.filter(function(){return!d3.select(this).classed("update")});function translate(e){var edge=g.edge(e);return _.has(edge,"x")?"translate("+edge.x+","+edge.y+")":""}created.attr("transform",translate);util.applyTransition(selection,g).style("opacity",1).attr("transform",translate)}},{"./d3":7,"./lodash":20,"./util":25}],22:[function(require,module,exports){"use strict";var util=require("./util"),d3=require("./d3");module.exports=positionNodes;function positionNodes(selection,g){var created=selection.filter(function(){return!d3.select(this).classed("update")});function translate(v){var node=g.node(v);return"translate("+node.x+","+node.y+")"}created.attr("transform",translate);util.applyTransition(selection,g).style("opacity",1).attr("transform",translate)}},{"./d3":7,"./util":25}],23:[function(require,module,exports){var _=require("./lodash"),layout=require("./dagre").layout;module.exports=render;function render(){var createNodes=require("./create-nodes"),createClusters=require("./create-clusters"),createEdgeLabels=require("./create-edge-labels"),createEdgePaths=require("./create-edge-paths"),positionNodes=require("./position-nodes"),positionEdgeLabels=require("./position-edge-labels"),shapes=require("./shapes"),arrows=require("./arrows");var fn=function(svg,g){preProcessGraph(g);var outputGroup=createOrSelectGroup(svg,"output"),clustersGroup=createOrSelectGroup(outputGroup,"clusters"),edgePathsGroup=createOrSelectGroup(outputGroup,"edgePaths"),edgeLabels=createEdgeLabels(createOrSelectGroup(outputGroup,"edgeLabels"),g),nodes=createNodes(createOrSelectGroup(outputGroup,"nodes"),g,shapes);layout(g);positionNodes(nodes,g);positionEdgeLabels(edgeLabels,g);createEdgePaths(edgePathsGroup,g,arrows);createClusters(clustersGroup,g);postProcessGraph(g)};fn.createNodes=function(value){if(!arguments.length)return createNodes;createNodes=value;return fn};fn.createClusters=function(value){if(!arguments.length)return createClusters;createClusters=value;return fn};fn.createEdgeLabels=function(value){if(!arguments.length)return createEdgeLabels;createEdgeLabels=value;return fn};fn.createEdgePaths=function(value){if(!arguments.length)return createEdgePaths;createEdgePaths=value;return fn};fn.shapes=function(value){if(!arguments.length)return shapes;shapes=value;return fn};fn.arrows=function(value){if(!arguments.length)return arrows;arrows=value;return fn};return fn}var NODE_DEFAULT_ATTRS={paddingLeft:10,paddingRight:10,paddingTop:10,paddingBottom:10,rx:0,ry:0,shape:"rect"};var EDGE_DEFAULT_ATTRS={arrowhead:"normal",lineInterpolate:"linear"};function preProcessGraph(g){g.nodes().forEach(function(v){var node=g.node(v);if(!_.has(node,"label")){node.label=v}if(_.has(node,"paddingX")){_.defaults(node,{paddingLeft:node.paddingX,paddingRight:node.paddingX})}if(_.has(node,"paddingY")){_.defaults(node,{paddingTop:node.paddingY,paddingBottom:node.paddingY})}if(_.has(node,"padding")){_.defaults(node,{paddingLeft:node.padding,paddingRight:node.padding,paddingTop:node.padding,paddingBottom:node.padding})}_.defaults(node,NODE_DEFAULT_ATTRS);_.each(["paddingLeft","paddingRight","paddingTop","paddingBottom"],function(k){node[k]=Number(node[k])});if(_.has(node,"width")){node._prevWidth=node.width}if(_.has(node,"height")){node._prevHeight=node.height}});g.edges().forEach(function(e){var edge=g.edge(e);if(!_.has(edge,"label")){edge.label=""}_.defaults(edge,EDGE_DEFAULT_ATTRS)})}function postProcessGraph(g){_.each(g.nodes(),function(v){var node=g.node(v);if(_.has(node,"_prevWidth")){node.width=node._prevWidth}else{delete node.width}if(_.has(node,"_prevHeight")){node.height=node._prevHeight}else{delete node.height}delete node._prevWidth;delete node._prevHeight})}function createOrSelectGroup(root,name){var selection=root.select("g."+name);if(selection.empty()){selection=root.append("g").attr("class",name)}return selection}},{"./arrows":2,"./create-clusters":3,"./create-edge-labels":4,"./create-edge-paths":5,"./create-nodes":6,"./dagre":8,"./lodash":20,"./position-edge-labels":21,"./position-nodes":22,"./shapes":24}],24:[function(require,module,exports){"use strict";var intersectRect=require("./intersect/intersect-rect"),intersectEllipse=require("./intersect/intersect-ellipse"),intersectCircle=require("./intersect/intersect-circle"),intersectPolygon=require("./intersect/intersect-polygon");module.exports={rect:rect,ellipse:ellipse,circle:circle,diamond:diamond};function rect(parent,bbox,node){var shapeSvg=parent.insert("rect",":first-child").attr("rx",node.rx).attr("ry",node.ry).attr("x",-bbox.width/2).attr("y",-bbox.height/2).attr("width",bbox.width).attr("height",bbox.height);node.intersect=function(point){return intersectRect(node,point)};return shapeSvg}function ellipse(parent,bbox,node){var rx=bbox.width/2,ry=bbox.height/2,shapeSvg=parent.insert("ellipse",":first-child").attr("x",-bbox.width/2).attr("y",-bbox.height/2).attr("rx",rx).attr("ry",ry);node.intersect=function(point){return intersectEllipse(node,rx,ry,point)};return shapeSvg}function circle(parent,bbox,node){var r=Math.max(bbox.width,bbox.height)/2,shapeSvg=parent.insert("circle",":first-child").attr("x",-bbox.width/2).attr("y",-bbox.height/2).attr("r",r);node.intersect=function(point){return intersectCircle(node,r,point)};return shapeSvg}function diamond(parent,bbox,node){var w=bbox.width*Math.SQRT2/2,h=bbox.height*Math.SQRT2/2,points=[{x:0,y:-h},{x:-w,y:0},{x:0,y:h},{x:w,y:0}],shapeSvg=parent.insert("polygon",":first-child").attr("points",points.map(function(p){return p.x+","+p.y}).join(" "));node.intersect=function(p){return intersectPolygon(node,points,p)};return shapeSvg}},{"./intersect/intersect-circle":11,"./intersect/intersect-ellipse":12,"./intersect/intersect-polygon":15,"./intersect/intersect-rect":16}],25:[function(require,module,exports){var _=require("./lodash");module.exports={isSubgraph:isSubgraph,edgeToId:edgeToId,applyStyle:applyStyle,applyClass:applyClass,applyTransition:applyTransition};function isSubgraph(g,v){return!!g.children(v).length}function edgeToId(e){return escapeId(e.v)+":"+escapeId(e.w)+":"+escapeId(e.name)}var ID_DELIM=/:/g;function escapeId(str){return str?String(str).replace(ID_DELIM,"\\:"):""}function applyStyle(dom,styleFn){if(styleFn){dom.attr("style",styleFn)}}function applyClass(dom,classFn,otherClasses){if(classFn){dom.attr("class",classFn).attr("class",otherClasses+" "+dom.attr("class"))}}function applyTransition(selection,g){var graph=g.graph();if(_.isPlainObject(graph)){var transition=graph.transition;if(_.isFunction(transition)){return transition(selection)}}return selection}},{"./lodash":20}],26:[function(require,module,exports){module.exports="0.4.4-pre"},{}],27:[function(require,module,exports){module.exports={graphlib:require("./lib/graphlib"),layout:require("./lib/layout"),debug:require("./lib/debug"),util:{time:require("./lib/util").time,notime:require("./lib/util").notime},version:require("./lib/version")}},{"./lib/debug":32,"./lib/graphlib":33,"./lib/layout":35,"./lib/util":55,"./lib/version":56}],28:[function(require,module,exports){"use strict";var _=require("./lodash"),greedyFAS=require("./greedy-fas");module.exports={run:run,undo:undo};function run(g){var fas=g.graph().acyclicer==="greedy"?greedyFAS(g,weightFn(g)):dfsFAS(g);_.each(fas,function(e){var label=g.edge(e);g.removeEdge(e);label.forwardName=e.name;label.reversed=true;g.setEdge(e.w,e.v,label,_.uniqueId("rev"))});function weightFn(g){return function(e){return g.edge(e).weight}}}function dfsFAS(g){var fas=[],stack={},visited={};function dfs(v){if(_.has(visited,v)){return}visited[v]=true;stack[v]=true;_.each(g.outEdges(v),function(e){if(_.has(stack,e.w)){fas.push(e)}else{dfs(e.w)}});delete stack[v]}_.each(g.nodes(),dfs);return fas}function undo(g){_.each(g.edges(),function(e){var label=g.edge(e);if(label.reversed){g.removeEdge(e);var forwardName=label.forwardName;delete label.reversed;delete label.forwardName;g.setEdge(e.w,e.v,label,forwardName)}})}},{"./greedy-fas":34,"./lodash":36}],29:[function(require,module,exports){var _=require("./lodash"),util=require("./util");module.exports=addBorderSegments;function addBorderSegments(g){function dfs(v){var children=g.children(v),node=g.node(v);if(children.length){_.each(children,dfs)}if(_.has(node,"minRank")){node.borderLeft=[];node.borderRight=[];for(var rank=node.minRank,maxRank=node.maxRank+1;rank<maxRank;++rank){addBorderNode(g,"borderLeft","_bl",v,node,rank);addBorderNode(g,"borderRight","_br",v,node,rank)}}}_.each(g.children(),dfs)}function addBorderNode(g,prop,prefix,sg,sgNode,rank){var label={width:0,height:0,rank:rank},prev=sgNode[prop][rank-1],curr=util.addDummyNode(g,"border",label,prefix);sgNode[prop][rank]=curr;g.setParent(curr,sg);if(prev){g.setEdge(prev,curr,{weight:1})}}},{"./lodash":36,"./util":55}],30:[function(require,module,exports){"use strict";var _=require("./lodash");module.exports={adjust:adjust,undo:undo};function adjust(g){var rankDir=g.graph().rankdir.toLowerCase();if(rankDir==="lr"||rankDir==="rl"){swapWidthHeight(g)}}function undo(g){var rankDir=g.graph().rankdir.toLowerCase();if(rankDir==="bt"||rankDir==="rl"){reverseY(g)}if(rankDir==="lr"||rankDir==="rl"){swapXY(g);swapWidthHeight(g)}}function swapWidthHeight(g){_.each(g.nodes(),function(v){swapWidthHeightOne(g.node(v))});_.each(g.edges(),function(e){swapWidthHeightOne(g.edge(e))})}function swapWidthHeightOne(attrs){var w=attrs.width;attrs.width=attrs.height;attrs.height=w}function reverseY(g){_.each(g.nodes(),function(v){reverseYOne(g.node(v))});_.each(g.edges(),function(e){var edge=g.edge(e);_.each(edge.points,reverseYOne);if(_.has(edge,"y")){reverseYOne(edge)}})}function reverseYOne(attrs){attrs.y=-attrs.y}function swapXY(g){_.each(g.nodes(),function(v){swapXYOne(g.node(v))});_.each(g.edges(),function(e){var edge=g.edge(e);_.each(edge.points,swapXYOne);if(_.has(edge,"x")){swapXYOne(edge)}})}function swapXYOne(attrs){var x=attrs.x;attrs.x=attrs.y;attrs.y=x}},{"./lodash":36}],31:[function(require,module,exports){module.exports=List;function List(){var sentinel={};sentinel._next=sentinel._prev=sentinel;this._sentinel=sentinel}List.prototype.dequeue=function(){var sentinel=this._sentinel,entry=sentinel._prev;if(entry!==sentinel){unlink(entry);return entry}};List.prototype.enqueue=function(entry){var sentinel=this._sentinel;if(entry._prev&&entry._next){unlink(entry)}entry._next=sentinel._next;sentinel._next._prev=entry;sentinel._next=entry;entry._prev=sentinel};List.prototype.toString=function(){var strs=[],sentinel=this._sentinel,curr=sentinel._prev;while(curr!==sentinel){strs.push(JSON.stringify(curr,filterOutLinks));curr=curr._prev}return"["+strs.join(", ")+"]"};function unlink(entry){entry._prev._next=entry._next;entry._next._prev=entry._prev;delete entry._next;delete entry._prev}function filterOutLinks(k,v){if(k!=="_next"&&k!=="_prev"){return v}}},{}],32:[function(require,module,exports){var _=require("./lodash"),util=require("./util"),Graph=require("./graphlib").Graph;module.exports={debugOrdering:debugOrdering};function debugOrdering(g){var layerMatrix=util.buildLayerMatrix(g);var h=new Graph({compound:true,multigraph:true}).setGraph({});_.each(g.nodes(),function(v){h.setNode(v,{label:v});h.setParent(v,"layer"+g.node(v).rank)});_.each(g.edges(),function(e){h.setEdge(e.v,e.w,{},e.name)});_.each(layerMatrix,function(layer,i){var layerV="layer"+i;h.setNode(layerV,{rank:"same"});_.reduce(layer,function(u,v){h.setEdge(u,v,{style:"invis"});return v})});return h}},{"./graphlib":33,"./lodash":36,"./util":55}],33:[function(require,module,exports){arguments[4][9][0].apply(exports,arguments)},{dup:9,graphlib:57}],34:[function(require,module,exports){var _=require("./lodash"),Graph=require("./graphlib").Graph,List=require("./data/list");module.exports=greedyFAS;var DEFAULT_WEIGHT_FN=_.constant(1);function greedyFAS(g,weightFn){if(g.nodeCount()<=1){return[]}var state=buildState(g,weightFn||DEFAULT_WEIGHT_FN);var results=doGreedyFAS(state.graph,state.buckets,state.zeroIdx);return _.flatten(_.map(results,function(e){return g.outEdges(e.v,e.w)}),true)}function doGreedyFAS(g,buckets,zeroIdx){var results=[],sources=buckets[buckets.length-1],sinks=buckets[0];var entry;while(g.nodeCount()){while(entry=sinks.dequeue()){removeNode(g,buckets,zeroIdx,entry)}while(entry=sources.dequeue()){removeNode(g,buckets,zeroIdx,entry)}if(g.nodeCount()){for(var i=buckets.length-2;i>0;--i){entry=buckets[i].dequeue();if(entry){results=results.concat(removeNode(g,buckets,zeroIdx,entry,true));break}}}}return results}function removeNode(g,buckets,zeroIdx,entry,collectPredecessors){var results=collectPredecessors?[]:undefined;_.each(g.inEdges(entry.v),function(edge){var weight=g.edge(edge),uEntry=g.node(edge.v);if(collectPredecessors){results.push({v:edge.v,w:edge.w})}uEntry.out-=weight;assignBucket(buckets,zeroIdx,uEntry)});_.each(g.outEdges(entry.v),function(edge){var weight=g.edge(edge),w=edge.w,wEntry=g.node(w);wEntry["in"]-=weight;assignBucket(buckets,zeroIdx,wEntry)});g.removeNode(entry.v);return results}function buildState(g,weightFn){var fasGraph=new Graph,maxIn=0,maxOut=0;_.each(g.nodes(),function(v){fasGraph.setNode(v,{v:v,"in":0,out:0})});_.each(g.edges(),function(e){var prevWeight=fasGraph.edge(e.v,e.w)||0,weight=weightFn(e),edgeWeight=prevWeight+weight;fasGraph.setEdge(e.v,e.w,edgeWeight);maxOut=Math.max(maxOut,fasGraph.node(e.v).out+=weight);maxIn=Math.max(maxIn,fasGraph.node(e.w)["in"]+=weight)});var buckets=_.range(maxOut+maxIn+3).map(function(){return new List});var zeroIdx=maxIn+1;_.each(fasGraph.nodes(),function(v){assignBucket(buckets,zeroIdx,fasGraph.node(v))});return{graph:fasGraph,buckets:buckets,zeroIdx:zeroIdx}}function assignBucket(buckets,zeroIdx,entry){if(!entry.out){buckets[0].enqueue(entry)}else if(!entry["in"]){buckets[buckets.length-1].enqueue(entry)}else{buckets[entry.out-entry["in"]+zeroIdx].enqueue(entry)}}},{"./data/list":31,"./graphlib":33,"./lodash":36}],35:[function(require,module,exports){"use strict";var _=require("./lodash"),acyclic=require("./acyclic"),normalize=require("./normalize"),rank=require("./rank"),normalizeRanks=require("./util").normalizeRanks,parentDummyChains=require("./parent-dummy-chains"),removeEmptyRanks=require("./util").removeEmptyRanks,nestingGraph=require("./nesting-graph"),addBorderSegments=require("./add-border-segments"),coordinateSystem=require("./coordinate-system"),order=require("./order"),position=require("./position"),util=require("./util"),Graph=require("./graphlib").Graph;module.exports=layout;function layout(g,opts){var time=opts&&opts.debugTiming?util.time:util.notime;time("layout",function(){var layoutGraph=time("  buildLayoutGraph",function(){return buildLayoutGraph(g)});time("  runLayout",function(){runLayout(layoutGraph,time)});time("  updateInputGraph",function(){updateInputGraph(g,layoutGraph)})})}function runLayout(g,time){time("    makeSpaceForEdgeLabels",function(){makeSpaceForEdgeLabels(g)});time("    removeSelfEdges",function(){removeSelfEdges(g)});time("    acyclic",function(){acyclic.run(g)});time("    nestingGraph.run",function(){nestingGraph.run(g)});time("    rank",function(){rank(util.asNonCompoundGraph(g))});time("    injectEdgeLabelProxies",function(){injectEdgeLabelProxies(g)});time("    removeEmptyRanks",function(){removeEmptyRanks(g)});time("    nestingGraph.cleanup",function(){nestingGraph.cleanup(g)});time("    normalizeRanks",function(){normalizeRanks(g)});time("    assignRankMinMax",function(){assignRankMinMax(g)});time("    removeEdgeLabelProxies",function(){removeEdgeLabelProxies(g)});time("    normalize.run",function(){normalize.run(g)});time("    parentDummyChains",function(){parentDummyChains(g)});time("    addBorderSegments",function(){addBorderSegments(g)});time("    order",function(){order(g)});time("    insertSelfEdges",function(){insertSelfEdges(g)});time("    adjustCoordinateSystem",function(){coordinateSystem.adjust(g)});time("    position",function(){position(g)});time("    positionSelfEdges",function(){positionSelfEdges(g)});time("    removeBorderNodes",function(){removeBorderNodes(g)});time("    normalize.undo",function(){normalize.undo(g)});time("    fixupEdgeLabelCoords",function(){fixupEdgeLabelCoords(g)});time("    undoCoordinateSystem",function(){coordinateSystem.undo(g)});time("    translateGraph",function(){translateGraph(g)});time("    assignNodeIntersects",function(){assignNodeIntersects(g)});time("    reversePoints",function(){reversePointsForReversedEdges(g)});time("    acyclic.undo",function(){acyclic.undo(g)})}function updateInputGraph(inputGraph,layoutGraph){_.each(inputGraph.nodes(),function(v){var inputLabel=inputGraph.node(v),layoutLabel=layoutGraph.node(v);if(inputLabel){inputLabel.x=layoutLabel.x;inputLabel.y=layoutLabel.y;if(layoutGraph.children(v).length){inputLabel.width=layoutLabel.width;inputLabel.height=layoutLabel.height}}});_.each(inputGraph.edges(),function(e){var inputLabel=inputGraph.edge(e),layoutLabel=layoutGraph.edge(e);inputLabel.points=layoutLabel.points;if(_.has(layoutLabel,"x")){inputLabel.x=layoutLabel.x;inputLabel.y=layoutLabel.y}});inputGraph.graph().width=layoutGraph.graph().width;inputGraph.graph().height=layoutGraph.graph().height}var graphNumAttrs=["nodesep","edgesep","ranksep","marginx","marginy"],graphDefaults={ranksep:50,edgesep:20,nodesep:50,rankdir:"tb"},graphAttrs=["acyclicer","ranker","rankdir","align"],nodeNumAttrs=["width","height"],nodeDefaults={width:0,height:0},edgeNumAttrs=["minlen","weight","width","height","labeloffset"],edgeDefaults={minlen:1,weight:1,width:0,height:0,labeloffset:10,labelpos:"r"},edgeAttrs=["labelpos"];function buildLayoutGraph(inputGraph){var g=new Graph({multigraph:true,compound:true}),graph=canonicalize(inputGraph.graph());g.setGraph(_.merge({},graphDefaults,selectNumberAttrs(graph,graphNumAttrs),_.pick(graph,graphAttrs)));_.each(inputGraph.nodes(),function(v){var node=canonicalize(inputGraph.node(v));g.setNode(v,_.defaults(selectNumberAttrs(node,nodeNumAttrs),nodeDefaults));g.setParent(v,inputGraph.parent(v))});_.each(inputGraph.edges(),function(e){var edge=canonicalize(inputGraph.edge(e));g.setEdge(e,_.merge({},edgeDefaults,selectNumberAttrs(edge,edgeNumAttrs),_.pick(edge,edgeAttrs)))});return g}function makeSpaceForEdgeLabels(g){var graph=g.graph();
+module.exports={graphlib:require("./lib/graphlib"),dagre:require("./lib/dagre"),intersect:require("./lib/intersect"),render:require("./lib/render"),util:require("./lib/util"),version:require("./lib/version")}},{"./lib/dagre":8,"./lib/graphlib":9,"./lib/intersect":10,"./lib/render":23,"./lib/util":25,"./lib/version":26}],2:[function(require,module,exports){var util=require("./util");module.exports={"default":normal,normal:normal,vee:vee,undirected:undirected};function normal(parent,id,edge,type){var marker=parent.append("marker").attr("id",id).attr("viewBox","0 0 10 10").attr("refX",9).attr("refY",5).attr("markerUnits","strokeWidth").attr("markerWidth",8).attr("markerHeight",6).attr("orient","auto");var path=marker.append("path").attr("d","M 0 0 L 10 5 L 0 10 z").style("stroke-width",1).style("stroke-dasharray","1,0");util.applyStyle(path,edge[type+"Style"])}function vee(parent,id,edge,type){var marker=parent.append("marker").attr("id",id).attr("viewBox","0 0 10 10").attr("refX",9).attr("refY",5).attr("markerUnits","strokeWidth").attr("markerWidth",8).attr("markerHeight",6).attr("orient","auto");var path=marker.append("path").attr("d","M 0 0 L 10 5 L 0 10 L 4 5 z").style("stroke-width",1).style("stroke-dasharray","1,0");util.applyStyle(path,edge[type+"Style"])}function undirected(parent,id,edge,type){var marker=parent.append("marker").attr("id",id).attr("viewBox","0 0 10 10").attr("refX",9).attr("refY",5).attr("markerUnits","strokeWidth").attr("markerWidth",8).attr("markerHeight",6).attr("orient","auto");var path=marker.append("path").attr("d","M 0 5 L 10 5").style("stroke-width",1).style("stroke-dasharray","1,0");util.applyStyle(path,edge[type+"Style"])}},{"./util":25}],3:[function(require,module,exports){var util=require("./util");module.exports=createClusters;function createClusters(selection,g){var clusters=g.nodes().filter(function(v){return util.isSubgraph(g,v)}),svgClusters=selection.selectAll("g.cluster").data(clusters,function(v){return v});svgClusters.enter().append("g").attr("id",function(v){return"cluster_"+v.replace(/^cluster/,"")}).attr("name",function(v){return g.node(v).label}).attr("class","cluster").style("opacity",0).append("rect");util.applyTransition(svgClusters.exit(),g).style("opacity",0).remove();util.applyTransition(svgClusters,g).style("opacity",1);util.applyTransition(svgClusters.selectAll("rect"),g).attr("width",function(v){var node=g.node(v);return node.width+node.paddingLeft+node.paddingRight}).attr("height",function(v){var node=g.node(v);return node.height+node.paddingTop+node.paddingBottom}).attr("x",function(v){var node=g.node(v);return node.x-node.width/2-node.paddingLeft}).attr("y",function(v){var node=g.node(v);return node.y-node.height/2-node.paddingTop})}},{"./util":25}],4:[function(require,module,exports){"use strict";var _=require("./lodash"),addLabel=require("./label/add-label"),util=require("./util"),d3=require("./d3");module.exports=createEdgeLabels;function createEdgeLabels(selection,g){var svgEdgeLabels=selection.selectAll("g.edgeLabel").data(g.edges(),function(e){return util.edgeToId(e)}).classed("update",true);svgEdgeLabels.selectAll("*").remove();svgEdgeLabels.enter().append("g").classed("edgeLabel",true).style("opacity",0);svgEdgeLabels.each(function(e){var edge=g.edge(e),label=addLabel(d3.select(this),g.edge(e),0,0).classed("label",true),bbox=label.node().getBBox();if(edge.labelId){label.attr("id",edge.labelId)}if(!_.has(edge,"width")){edge.width=bbox.width}if(!_.has(edge,"height")){edge.height=bbox.height}});util.applyTransition(svgEdgeLabels.exit(),g).style("opacity",0).remove();return svgEdgeLabels}},{"./d3":7,"./label/add-label":18,"./lodash":20,"./util":25}],5:[function(require,module,exports){"use strict";var _=require("./lodash"),intersectNode=require("./intersect/intersect-node"),util=require("./util"),d3=require("./d3");module.exports=createEdgePaths;function createEdgePaths(selection,g,arrows){var svgPaths=selection.selectAll("g.edgePath").data(g.edges(),function(e){return util.edgeToId(e)}).classed("update",true);enter(svgPaths,g);exit(svgPaths,g);util.applyTransition(svgPaths,g).style("opacity",1);svgPaths.each(function(e){var domEdge=d3.select(this);var edge=g.edge(e);edge.elem=this;if(edge.id){domEdge.attr("id",edge.id)}util.applyClass(domEdge,edge["class"],(domEdge.classed("update")?"update ":"")+"edgePath")});svgPaths.selectAll("path.path").each(function(e){var edge=g.edge(e);edge.arrowheadId=_.uniqueId("arrowhead");var domEdge=d3.select(this).attr("marker-end",function(){return"url(#"+edge.arrowheadId+")"}).style("fill","none");util.applyTransition(domEdge,g).attr("d",function(e){return calcPoints(g,e)});util.applyStyle(domEdge,edge.style)});svgPaths.selectAll("defs *").remove();svgPaths.selectAll("defs").each(function(e){var edge=g.edge(e),arrowhead=arrows[edge.arrowhead];arrowhead(d3.select(this),edge.arrowheadId,edge,"arrowhead")});return svgPaths}function calcPoints(g,e){var edge=g.edge(e),tail=g.node(e.v),head=g.node(e.w),points=edge.points.slice(1,edge.points.length-1);points.unshift(intersectNode(tail,points[0]));points.push(intersectNode(head,points[points.length-1]));return createLine(edge,points)}function createLine(edge,points){var line=d3.svg.line().x(function(d){return d.x}).y(function(d){return d.y});if(_.has(edge,"lineInterpolate")){line.interpolate(edge.lineInterpolate)}if(_.has(edge,"lineTension")){line.tension(Number(edge.lineTension))}return line(points)}function getCoords(elem){var bbox=elem.getBBox(),matrix=elem.getTransformToElement(elem.ownerSVGElement).translate(bbox.width/2,bbox.height/2);return{x:matrix.e,y:matrix.f}}function enter(svgPaths,g){var svgPathsEnter=svgPaths.enter().append("g").attr("class","edgePath").style("opacity",0);svgPathsEnter.append("path").attr("class","path").attr("d",function(e){var edge=g.edge(e),sourceElem=g.node(e.v).elem,points=_.range(edge.points.length).map(function(){return getCoords(sourceElem)});return createLine(edge,points)});svgPathsEnter.append("defs")}function exit(svgPaths,g){var svgPathExit=svgPaths.exit();util.applyTransition(svgPathExit,g).style("opacity",0).remove();util.applyTransition(svgPathExit.select("path.path"),g).attr("d",function(e){var source=g.node(e.v);if(source){var points=_.range(this.pathSegList.length).map(function(){return source});return createLine({},points)}else{return d3.select(this).attr("d")}})}},{"./d3":7,"./intersect/intersect-node":14,"./lodash":20,"./util":25}],6:[function(require,module,exports){"use strict";var _=require("./lodash"),addLabel=require("./label/add-label"),util=require("./util"),d3=require("./d3");module.exports=createNodes;function createNodes(selection,g,shapes){var simpleNodes=g.nodes().filter(function(v){return!util.isSubgraph(g,v)});var svgNodes=selection.selectAll("g.node").data(simpleNodes,function(v){return v}).classed("update",true);svgNodes.selectAll("*").remove();svgNodes.enter().append("g").attr("id",function(v){return"node_"+v}).attr("name",function(v){return g.node(v).label}).attr("class","node").style("opacity",0);svgNodes.each(function(v){var node=g.node(v),thisGroup=d3.select(this),labelGroup=thisGroup.append("g").attr("class","label"),labelDom=addLabel(labelGroup,node),shape=shapes[node.shape],bbox=_.pick(labelDom.node().getBBox(),"width","height");node.elem=this;if(node.id){thisGroup.attr("id",node.id)}if(node.labelId){labelGroup.attr("id",node.labelId)}util.applyClass(thisGroup,node["class"],(thisGroup.classed("update")?"update ":"")+"node");if(_.has(node,"width")){bbox.width=node.width}if(_.has(node,"height")){bbox.height=node.height}bbox.width+=node.paddingLeft+node.paddingRight;bbox.height+=node.paddingTop+node.paddingBottom;labelGroup.attr("transform","translate("+(node.paddingLeft-node.paddingRight)/2+","+(node.paddingTop-node.paddingBottom)/2+")");var shapeSvg=shape(d3.select(this),bbox,node);util.applyStyle(shapeSvg,node.style);var shapeBBox=shapeSvg.node().getBBox();node.width=shapeBBox.width;node.height=shapeBBox.height});util.applyTransition(svgNodes.exit(),g).style("opacity",0).remove();return svgNodes}},{"./d3":7,"./label/add-label":18,"./lodash":20,"./util":25}],7:[function(require,module,exports){module.exports=window.d3},{}],8:[function(require,module,exports){var dagre;if(require){try{dagre=require("dagre")}catch(e){}}if(!dagre){dagre=window.dagre}module.exports=dagre},{dagre:27}],9:[function(require,module,exports){var graphlib;if(require){try{graphlib=require("graphlib")}catch(e){}}if(!graphlib){graphlib=window.graphlib}module.exports=graphlib},{graphlib:57}],10:[function(require,module,exports){module.exports={node:require("./intersect-node"),circle:require("./intersect-circle"),ellipse:require("./intersect-ellipse"),polygon:require("./intersect-polygon"),rect:require("./intersect-rect")}},{"./intersect-circle":11,"./intersect-ellipse":12,"./intersect-node":14,"./intersect-polygon":15,"./intersect-rect":16}],11:[function(require,module,exports){var intersectEllipse=require("./intersect-ellipse");module.exports=intersectCircle;function intersectCircle(node,rx,point){return intersectEllipse(node,rx,rx,point)}},{"./intersect-ellipse":12}],12:[function(require,module,exports){module.exports=intersectEllipse;function intersectEllipse(node,rx,ry,point){var cx=node.x;var cy=node.y;var px=cx-point.x;var py=cy-point.y;var det=Math.sqrt(rx*rx*py*py+ry*ry*px*px);var dx=Math.abs(rx*ry*px/det);if(point.x<cx){dx=-dx}var dy=Math.abs(rx*ry*py/det);if(point.y<cy){dy=-dy}return{x:cx+dx,y:cy+dy}}},{}],13:[function(require,module,exports){module.exports=intersectLine;function intersectLine(p1,p2,q1,q2){var a1,a2,b1,b2,c1,c2;var r1,r2,r3,r4;var denom,offset,num;var x,y;a1=p2.y-p1.y;b1=p1.x-p2.x;c1=p2.x*p1.y-p1.x*p2.y;r3=a1*q1.x+b1*q1.y+c1;r4=a1*q2.x+b1*q2.y+c1;if(r3!==0&&r4!==0&&sameSign(r3,r4)){return}a2=q2.y-q1.y;b2=q1.x-q2.x;c2=q2.x*q1.y-q1.x*q2.y;r1=a2*p1.x+b2*p1.yy+c2;r2=a2*p2.x+b2*p2.y+c2;if(r1!==0&&r2!==0&&sameSign(r1,r2)){return}denom=a1*b2-a2*b1;if(denom===0){return}offset=Math.abs(denom/2);num=b1*c2-b2*c1;x=num<0?(num-offset)/denom:(num+offset)/denom;num=a2*c1-a1*c2;y=num<0?(num-offset)/denom:(num+offset)/denom;return{x:x,y:y}}function sameSign(r1,r2){return r1*r2>0}},{}],14:[function(require,module,exports){module.exports=intersectNode;function intersectNode(node,point){return node.intersect(point)}},{}],15:[function(require,module,exports){var intersectLine=require("./intersect-line");module.exports=intersectPolygon;function intersectPolygon(node,polyPoints,point){var x1=node.x;var y1=node.y;var intersections=[];var minX=Number.POSITIVE_INFINITY,minY=Number.POSITIVE_INFINITY;polyPoints.forEach(function(entry){minX=Math.min(minX,entry.x);minY=Math.min(minY,entry.y)});var left=x1-node.width/2-minX;var top=y1-node.height/2-minY;for(var i=0;i<polyPoints.length;i++){var p1=polyPoints[i];var p2=polyPoints[i<polyPoints.length-1?i+1:0];var intersect=intersectLine(node,point,{x:left+p1.x,y:top+p1.y},{x:left+p2.x,y:top+p2.y});if(intersect){intersections.push(intersect)}}if(!intersections.length){console.log("NO INTERSECTION FOUND, RETURN NODE CENTER",node);return node}if(intersections.length>1){intersections.sort(function(p,q){var pdx=p.x-point.x,pdy=p.y-point.y,distp=Math.sqrt(pdx*pdx+pdy*pdy),qdx=q.x-point.x,qdy=q.y-point.y,distq=Math.sqrt(qdx*qdx+qdy*qdy);return distp<distq?-1:distp===distq?0:1})}return intersections[0]}},{"./intersect-line":13}],16:[function(require,module,exports){module.exports=intersectRect;function intersectRect(node,point){var x=node.x;var y=node.y;var dx=point.x-x;var dy=point.y-y;var w=node.width/2;var h=node.height/2;var sx,sy;if(Math.abs(dy)*w>Math.abs(dx)*h){if(dy<0){h=-h}sx=dy===0?0:h*dx/dy;sy=h}else{if(dx<0){w=-w}sx=w;sy=dx===0?0:w*dy/dx}return{x:x+sx,y:y+sy}}},{}],17:[function(require,module,exports){var util=require("../util");module.exports=addHtmlLabel;function addHtmlLabel(root,node){var fo=root.append("foreignObject").attr("width","100000");var div=fo.append("xhtml:div");var label=node.label;switch(typeof label){case"function":div.insert(label);break;case"object":div.insert(function(){return label});break;default:div.html(label)}util.applyStyle(div,node.labelStyle);div.style("display","inline-block");div.style("white-space","nowrap");var w,h;div.each(function(){w=this.clientWidth;h=this.clientHeight});fo.attr("width",w).attr("height",h);return fo}},{"../util":25}],18:[function(require,module,exports){var addTextLabel=require("./add-text-label"),addHtmlLabel=require("./add-html-label");module.exports=addLabel;function addLabel(root,node){var label=node.label;var labelSvg=root.append("g");if(typeof label!=="string"||node.labelType==="html"){addHtmlLabel(labelSvg,node)}else{addTextLabel(labelSvg,node)}var labelBBox=labelSvg.node().getBBox();labelSvg.attr("transform","translate("+-labelBBox.width/2+","+-labelBBox.height/2+")");return labelSvg}},{"./add-html-label":17,"./add-text-label":19}],19:[function(require,module,exports){var util=require("../util");module.exports=addTextLabel;function addTextLabel(root,node){var domNode=root.append("text");var lines=processEscapeSequences(node.label).split("\n");for(var i=0;i<lines.length;i++){domNode.append("tspan").attr("xml:space","preserve").attr("dy","1em").attr("x","1").text(lines[i])}util.applyStyle(domNode,node.labelStyle);return domNode}function processEscapeSequences(text){var newText="",escaped=false,ch;for(var i=0;i<text.length;++i){ch=text[i];if(escaped){switch(ch){case"n":newText+="\n";break;default:newText+=ch}escaped=false}else if(ch==="\\"){escaped=true}else{newText+=ch}}return newText}},{"../util":25}],20:[function(require,module,exports){var lodash;if(require){try{lodash=require("lodash")}catch(e){}}if(!lodash){lodash=window._}module.exports=lodash},{lodash:77}],21:[function(require,module,exports){"use strict";var util=require("./util"),d3=require("./d3"),_=require("./lodash");module.exports=positionEdgeLabels;function positionEdgeLabels(selection,g){var created=selection.filter(function(){return!d3.select(this).classed("update")});function translate(e){var edge=g.edge(e);return _.has(edge,"x")?"translate("+edge.x+","+edge.y+")":""}created.attr("transform",translate);util.applyTransition(selection,g).style("opacity",1).attr("transform",translate)}},{"./d3":7,"./lodash":20,"./util":25}],22:[function(require,module,exports){"use strict";var util=require("./util"),d3=require("./d3");module.exports=positionNodes;function positionNodes(selection,g){var created=selection.filter(function(){return!d3.select(this).classed("update")});function translate(v){var node=g.node(v);return"translate("+node.x+","+node.y+")"}created.attr("transform",translate);util.applyTransition(selection,g).style("opacity",1).attr("transform",translate)}},{"./d3":7,"./util":25}],23:[function(require,module,exports){var _=require("./lodash"),layout=require("./dagre").layout;module.exports=render;function render(){var createNodes=require("./create-nodes"),createClusters=require("./create-clusters"),createEdgeLabels=require("./create-edge-labels"),createEdgePaths=require("./create-edge-paths"),positionNodes=require("./position-nodes"),positionEdgeLabels=require("./position-edge-labels"),shapes=require("./shapes"),arrows=require("./arrows");var fn=function(svg,g){preProcessGraph(g);var outputGroup=createOrSelectGroup(svg,"output"),clustersGroup=createOrSelectGroup(outputGroup,"clusters"),edgePathsGroup=createOrSelectGroup(outputGroup,"edgePaths"),edgeLabels=createEdgeLabels(createOrSelectGroup(outputGroup,"edgeLabels"),g),nodes=createNodes(createOrSelectGroup(outputGroup,"nodes"),g,shapes);layout(g);positionNodes(nodes,g);positionEdgeLabels(edgeLabels,g);createEdgePaths(edgePathsGroup,g,arrows);createClusters(clustersGroup,g);postProcessGraph(g)};fn.createNodes=function(value){if(!arguments.length)return createNodes;createNodes=value;return fn};fn.createClusters=function(value){if(!arguments.length)return createClusters;createClusters=value;return fn};fn.createEdgeLabels=function(value){if(!arguments.length)return createEdgeLabels;createEdgeLabels=value;return fn};fn.createEdgePaths=function(value){if(!arguments.length)return createEdgePaths;createEdgePaths=value;return fn};fn.shapes=function(value){if(!arguments.length)return shapes;shapes=value;return fn};fn.arrows=function(value){if(!arguments.length)return arrows;arrows=value;return fn};return fn}var NODE_DEFAULT_ATTRS={paddingLeft:0,paddingRight:0,paddingTop:0,paddingBottom:0,rx:0,ry:0,shape:"rect"};var EDGE_DEFAULT_ATTRS={arrowhead:"normal",lineInterpolate:"linear"};function preProcessGraph(g){g.nodes().forEach(function(v){var node=g.node(v);if(!_.has(node,"label")){node.label=v}if(_.has(node,"paddingX")){_.defaults(node,{paddingLeft:node.paddingX,paddingRight:node.paddingX})}if(_.has(node,"paddingY")){_.defaults(node,{paddingTop:node.paddingY,paddingBottom:node.paddingY})}if(_.has(node,"padding")){_.defaults(node,{paddingLeft:node.padding,paddingRight:node.padding,paddingTop:node.padding,paddingBottom:node.padding})}if(_.has(node,"paddingLeft")){_.defaults(node,{paddingLeft:node.paddingLeft})}if(_.has(node,"paddingRight")){_.defaults(node,{paddingRight:node.paddingRight})}if(_.has(node,"paddingTop")){_.defaults(node,{paddingTop:node.paddingTop})}if(_.has(node,"paddingBottom")){_.defaults(node,{paddingBottom:node.paddingBottom})}_.defaults(node,NODE_DEFAULT_ATTRS);_.each(["paddingLeft","paddingRight","paddingTop","paddingBottom"],function(k){node[k]=Number(node[k])});if(_.has(node,"width")){node._prevWidth=node.width}if(_.has(node,"height")){node._prevHeight=node.height}});g.edges().forEach(function(e){var edge=g.edge(e);if(!_.has(edge,"label")){edge.label=""}_.defaults(edge,EDGE_DEFAULT_ATTRS)})}function postProcessGraph(g){_.each(g.nodes(),function(v){var node=g.node(v);if(_.has(node,"_prevWidth")){node.width=node._prevWidth}else{delete node.width}if(_.has(node,"_prevHeight")){node.height=node._prevHeight}else{delete node.height}delete node._prevWidth;delete node._prevHeight})}function createOrSelectGroup(root,name){var selection=root.select("g."+name);if(selection.empty()){selection=root.append("g").attr("class",name)}return selection}},{"./arrows":2,"./create-clusters":3,"./create-edge-labels":4,"./create-edge-paths":5,"./create-nodes":6,"./dagre":8,"./lodash":20,"./position-edge-labels":21,"./position-nodes":22,"./shapes":24}],24:[function(require,module,exports){"use strict";var intersectRect=require("./intersect/intersect-rect"),intersectEllipse=require("./intersect/intersect-ellipse"),intersectCircle=require("./intersect/intersect-circle"),intersectPolygon=require("./intersect/intersect-polygon");module.exports={rect:rect,ellipse:ellipse,circle:circle,diamond:diamond};function rect(parent,bbox,node){var shapeSvg=parent.insert("rect",":first-child").attr("rx",node.rx).attr("ry",node.ry).attr("x",-bbox.width/2).attr("y",-bbox.height/2).attr("width",bbox.width).attr("height",bbox.height);node.intersect=function(point){return intersectRect(node,point)};return shapeSvg}function ellipse(parent,bbox,node){var rx=bbox.width/2,ry=bbox.height/2,shapeSvg=parent.insert("ellipse",":first-child").attr("x",-bbox.width/2).attr("y",-bbox.height/2).attr("rx",rx).attr("ry",ry);node.intersect=function(point){return intersectEllipse(node,rx,ry,point)};return shapeSvg}function circle(parent,bbox,node){var r=Math.max(bbox.width,bbox.height)/2,shapeSvg=parent.insert("circle",":first-child").attr("x",-bbox.width/2).attr("y",-bbox.height/2).attr("r",r);node.intersect=function(point){return intersectCircle(node,r,point)};return shapeSvg}function diamond(parent,bbox,node){var w=bbox.width*Math.SQRT2/2,h=bbox.height*Math.SQRT2/2,points=[{x:0,y:-h},{x:-w,y:0},{x:0,y:h},{x:w,y:0}],shapeSvg=parent.insert("polygon",":first-child").attr("points",points.map(function(p){return p.x+","+p.y}).join(" "));node.intersect=function(p){return intersectPolygon(node,points,p)};return shapeSvg}},{"./intersect/intersect-circle":11,"./intersect/intersect-ellipse":12,"./intersect/intersect-polygon":15,"./intersect/intersect-rect":16}],25:[function(require,module,exports){var _=require("./lodash");module.exports={isSubgraph:isSubgraph,edgeToId:edgeToId,applyStyle:applyStyle,applyClass:applyClass,applyTransition:applyTransition};function isSubgraph(g,v){return!!g.children(v).length}function edgeToId(e){return escapeId(e.v)+":"+escapeId(e.w)+":"+escapeId(e.name)}var ID_DELIM=/:/g;function escapeId(str){return str?String(str).replace(ID_DELIM,"\\:"):""}function applyStyle(dom,styleFn){if(styleFn){dom.attr("style",styleFn)}}function applyClass(dom,classFn,otherClasses){if(classFn){dom.attr("class",classFn).attr("class",otherClasses+" "+dom.attr("class"))}}function applyTransition(selection,g){var graph=g.graph();if(_.isPlainObject(graph)){var transition=graph.transition;if(_.isFunction(transition)){return transition(selection)}}return selection}},{"./lodash":20}],26:[function(require,module,exports){module.exports="0.4.4-pre"},{}],27:[function(require,module,exports){module.exports={graphlib:require("./lib/graphlib"),layout:require("./lib/layout"),debug:require("./lib/debug"),util:{time:require("./lib/util").time,notime:require("./lib/util").notime},version:require("./lib/version")}},{"./lib/debug":32,"./lib/graphlib":33,"./lib/layout":35,"./lib/util":55,"./lib/version":56}],28:[function(require,module,exports){"use strict";var _=require("./lodash"),greedyFAS=require("./greedy-fas");module.exports={run:run,undo:undo};function run(g){var fas=g.graph().acyclicer==="greedy"?greedyFAS(g,weightFn(g)):dfsFAS(g);_.each(fas,function(e){var label=g.edge(e);g.removeEdge(e);label.forwardName=e.name;label.reversed=true;g.setEdge(e.w,e.v,label,_.uniqueId("rev"))});function weightFn(g){return function(e){return g.edge(e).weight}}}function dfsFAS(g){var fas=[],stack={},visited={};function dfs(v){if(_.has(visited,v)){return}visited[v]=true;stack[v]=true;_.each(g.outEdges(v),function(e){if(_.has(stack,e.w)){fas.push(e)}else{dfs(e.w)}});delete stack[v]}_.each(g.nodes(),dfs);return fas}function undo(g){_.each(g.edges(),function(e){var label=g.edge(e);if(label.reversed){g.removeEdge(e);var forwardName=label.forwardName;delete label.reversed;delete label.forwardName;g.setEdge(e.w,e.v,label,forwardName)}})}},{"./greedy-fas":34,"./lodash":36}],29:[function(require,module,exports){var _=require("./lodash"),util=require("./util");module.exports=addBorderSegments;function addBorderSegments(g){function dfs(v){var children=g.children(v),node=g.node(v);if(children.length){_.each(children,dfs)}if(_.has(node,"minRank")){node.borderLeft=[];node.borderRight=[];for(var rank=node.minRank,maxRank=node.maxRank+1;rank<maxRank;++rank){addBorderNode(g,"borderLeft","_bl",v,node,rank);addBorderNode(g,"borderRight","_br",v,node,rank)}}}_.each(g.children(),dfs)}function addBorderNode(g,prop,prefix,sg,sgNode,rank){var label={width:0,height:0,rank:rank},prev=sgNode[prop][rank-1],curr=util.addDummyNode(g,"border",label,prefix);sgNode[prop][rank]=curr;g.setParent(curr,sg);if(prev){g.setEdge(prev,curr,{weight:1})}}},{"./lodash":36,"./util":55}],30:[function(require,module,exports){"use strict";var _=require("./lodash");module.exports={adjust:adjust,undo:undo};function adjust(g){var rankDir=g.graph().rankdir.toLowerCase();if(rankDir==="lr"||rankDir==="rl"){swapWidthHeight(g)}}function undo(g){var rankDir=g.graph().rankdir.toLowerCase();if(rankDir==="bt"||rankDir==="rl"){reverseY(g)}if(rankDir==="lr"||rankDir==="rl"){swapXY(g);swapWidthHeight(g)}}function swapWidthHeight(g){_.each(g.nodes(),function(v){swapWidthHeightOne(g.node(v))});_.each(g.edges(),function(e){swapWidthHeightOne(g.edge(e))})}function swapWidthHeightOne(attrs){var w=attrs.width;attrs.width=attrs.height;attrs.height=w}function reverseY(g){_.each(g.nodes(),function(v){reverseYOne(g.node(v))});_.each(g.edges(),function(e){var edge=g.edge(e);_.each(edge.points,reverseYOne);if(_.has(edge,"y")){reverseYOne(edge)}})}function reverseYOne(attrs){attrs.y=-attrs.y}function swapXY(g){_.each(g.nodes(),function(v){swapXYOne(g.node(v))});_.each(g.edges(),function(e){var edge=g.edge(e);_.each(edge.points,swapXYOne);if(_.has(edge,"x")){swapXYOne(edge)}})}function swapXYOne(attrs){var x=attrs.x;attrs.x=attrs.y;attrs.y=x}},{"./lodash":36}],31:[function(require,module,exports){module.exports=List;function List(){var sentinel={};sentinel._next=sentinel._prev=sentinel;this._sentinel=sentinel}List.prototype.dequeue=function(){var sentinel=this._sentinel,entry=sentinel._prev;if(entry!==sentinel){unlink(entry);return entry}};List.prototype.enqueue=function(entry){var sentinel=this._sentinel;if(entry._prev&&entry._next){unlink(entry)}entry._next=sentinel._next;sentinel._next._prev=entry;sentinel._next=entry;entry._prev=sentinel};List.prototype.toString=function(){var strs=[],sentinel=this._sentinel,curr=sentinel._prev;while(curr!==sentinel){strs.push(JSON.stringify(curr,filterOutLinks));curr=curr._prev}return"["+strs.join(", ")+"]"};function unlink(entry){entry._prev._next=entry._next;entry._next._prev=entry._prev;delete entry._next;delete entry._prev}function filterOutLinks(k,v){if(k!=="_next"&&k!=="_prev"){return v}}},{}],32:[function(require,module,exports){var _=require("./lodash"),util=require("./util"),Graph=require("./graphlib").Graph;module.exports={debugOrdering:debugOrdering};function debugOrdering(g){var layerMatrix=util.buildLayerMatrix(g);var h=new Graph({compound:true,multigraph:true}).setGraph({});_.each(g.nodes(),function(v){h.setNode(v,{label:v});h.setParent(v,"layer"+g.node(v).rank)});_.each(g.edges(),function(e){h.setEdge(e.v,e.w,{},e.name)});_.each(layerMatrix,function(layer,i){var layerV="layer"+i;h.setNode(layerV,{rank:"same"});_.reduce(layer,function(u,v){h.setEdge(u,v,{style:"invis"});return v})});return h}},{"./graphlib":33,"./lodash":36,"./util":55}],33:[function(require,module,exports){module.exports=require(9)},{"/Users/andrew/Documents/dev/dagre-d3/lib/graphlib.js":9,graphlib:57}],34:[function(require,module,exports){var _=require("./lodash"),Graph=require("./graphlib").Graph,List=require("./data/list");module.exports=greedyFAS;var DEFAULT_WEIGHT_FN=_.constant(1);function greedyFAS(g,weightFn){if(g.nodeCount()<=1){return[]}var state=buildState(g,weightFn||DEFAULT_WEIGHT_FN);var results=doGreedyFAS(state.graph,state.buckets,state.zeroIdx);return _.flatten(_.map(results,function(e){return g.outEdges(e.v,e.w)}),true)}function doGreedyFAS(g,buckets,zeroIdx){var results=[],sources=buckets[buckets.length-1],sinks=buckets[0];var entry;while(g.nodeCount()){while(entry=sinks.dequeue()){removeNode(g,buckets,zeroIdx,entry)}while(entry=sources.dequeue()){removeNode(g,buckets,zeroIdx,entry)}if(g.nodeCount()){for(var i=buckets.length-2;i>0;--i){entry=buckets[i].dequeue();if(entry){results=results.concat(removeNode(g,buckets,zeroIdx,entry,true));break}}}}return results}function removeNode(g,buckets,zeroIdx,entry,collectPredecessors){var results=collectPredecessors?[]:undefined;_.each(g.inEdges(entry.v),function(edge){var weight=g.edge(edge),uEntry=g.node(edge.v);if(collectPredecessors){results.push({v:edge.v,w:edge.w})}uEntry.out-=weight;assignBucket(buckets,zeroIdx,uEntry)});_.each(g.outEdges(entry.v),function(edge){var weight=g.edge(edge),w=edge.w,wEntry=g.node(w);wEntry["in"]-=weight;assignBucket(buckets,zeroIdx,wEntry)});g.removeNode(entry.v);return results}function buildState(g,weightFn){var fasGraph=new Graph,maxIn=0,maxOut=0;_.each(g.nodes(),function(v){fasGraph.setNode(v,{v:v,"in":0,out:0})});_.each(g.edges(),function(e){var prevWeight=fasGraph.edge(e.v,e.w)||0,weight=weightFn(e),edgeWeight=prevWeight+weight;fasGraph.setEdge(e.v,e.w,edgeWeight);maxOut=Math.max(maxOut,fasGraph.node(e.v).out+=weight);maxIn=Math.max(maxIn,fasGraph.node(e.w)["in"]+=weight)});var buckets=_.range(maxOut+maxIn+3).map(function(){return new List});var zeroIdx=maxIn+1;_.each(fasGraph.nodes(),function(v){assignBucket(buckets,zeroIdx,fasGraph.node(v))});return{graph:fasGraph,buckets:buckets,zeroIdx:zeroIdx}}function assignBucket(buckets,zeroIdx,entry){if(!entry.out){buckets[0].enqueue(entry)}else if(!entry["in"]){buckets[buckets.length-1].enqueue(entry)}else{buckets[entry.out-entry["in"]+zeroIdx].enqueue(entry)}}},{"./data/list":31,"./graphlib":33,"./lodash":36}],35:[function(require,module,exports){"use strict";var _=require("./lodash"),acyclic=require("./acyclic"),normalize=require("./normalize"),rank=require("./rank"),normalizeRanks=require("./util").normalizeRanks,parentDummyChains=require("./parent-dummy-chains"),removeEmptyRanks=require("./util").removeEmptyRanks,nestingGraph=require("./nesting-graph"),addBorderSegments=require("./add-border-segments"),coordinateSystem=require("./coordinate-system"),order=require("./order"),position=require("./position"),util=require("./util"),Graph=require("./graphlib").Graph;module.exports=layout;function layout(g,opts){var time=opts&&opts.debugTiming?util.time:util.notime;time("layout",function(){var layoutGraph=time("  buildLayoutGraph",function(){return buildLayoutGraph(g)});time("  runLayout",function(){runLayout(layoutGraph,time)});time("  updateInputGraph",function(){updateInputGraph(g,layoutGraph)})})}function runLayout(g,time){time("    makeSpaceForEdgeLabels",function(){makeSpaceForEdgeLabels(g)});time("    removeSelfEdges",function(){removeSelfEdges(g)});time("    acyclic",function(){acyclic.run(g)});time("    nestingGraph.run",function(){nestingGraph.run(g)});time("    rank",function(){rank(util.asNonCompoundGraph(g))});time("    injectEdgeLabelProxies",function(){injectEdgeLabelProxies(g)});time("    removeEmptyRanks",function(){removeEmptyRanks(g)});time("    nestingGraph.cleanup",function(){nestingGraph.cleanup(g)});time("    normalizeRanks",function(){normalizeRanks(g)});time("    assignRankMinMax",function(){assignRankMinMax(g)});time("    removeEdgeLabelProxies",function(){removeEdgeLabelProxies(g)});time("    normalize.run",function(){normalize.run(g)});time("    parentDummyChains",function(){parentDummyChains(g)});time("    addBorderSegments",function(){addBorderSegments(g)});time("    order",function(){order(g)});time("    insertSelfEdges",function(){insertSelfEdges(g)});time("    adjustCoordinateSystem",function(){coordinateSystem.adjust(g)});time("    position",function(){position(g)});time("    positionSelfEdges",function(){positionSelfEdges(g)});time("    removeBorderNodes",function(){removeBorderNodes(g)});time("    normalize.undo",function(){normalize.undo(g)});time("    fixupEdgeLabelCoords",function(){fixupEdgeLabelCoords(g)});time("    undoCoordinateSystem",function(){coordinateSystem.undo(g)});time("    translateGraph",function(){translateGraph(g)});time("    assignNodeIntersects",function(){assignNodeIntersects(g)});time("    reversePoints",function(){reversePointsForReversedEdges(g)});time("    acyclic.undo",function(){acyclic.undo(g)})}function updateInputGraph(inputGraph,layoutGraph){_.each(inputGraph.nodes(),function(v){var inputLabel=inputGraph.node(v),layoutLabel=layoutGraph.node(v);if(inputLabel){inputLabel.x=layoutLabel.x;inputLabel.y=layoutLabel.y;if(layoutGraph.children(v).length){inputLabel.width=layoutLabel.width;inputLabel.height=layoutLabel.height}}});_.each(inputGraph.edges(),function(e){var inputLabel=inputGraph.edge(e),layoutLabel=layoutGraph.edge(e);inputLabel.points=layoutLabel.points;if(_.has(layoutLabel,"x")){inputLabel.x=layoutLabel.x;inputLabel.y=layoutLabel.y}});inputGraph.graph().width=layoutGraph.graph().width;inputGraph.graph().height=layoutGraph.graph().height}var graphNumAttrs=["nodesep","edgesep","ranksep","marginx","marginy"],graphDefaults={ranksep:50,edgesep:20,nodesep:50,rankdir:"tb"},graphAttrs=["acyclicer","ranker","rankdir","align"],nodeNumAttrs=["width","height"],nodeDefaults={width:0,height:0},edgeNumAttrs=["minlen","weight","width","height","labeloffset"],edgeDefaults={minlen:1,weight:1,width:0,height:0,labeloffset:10,labelpos:"r"},edgeAttrs=["labelpos"];function buildLayoutGraph(inputGraph){var g=new Graph({multigraph:true,compound:true}),graph=canonicalize(inputGraph.graph());g.setGraph(_.merge({},graphDefaults,selectNumberAttrs(graph,graphNumAttrs),_.pick(graph,graphAttrs)));
 
-graph.ranksep/=2;_.each(g.edges(),function(e){var edge=g.edge(e);edge.minlen*=2;if(edge.labelpos.toLowerCase()!=="c"){if(graph.rankdir==="TB"||graph.rankdir==="BT"){edge.width+=edge.labeloffset}else{edge.height+=edge.labeloffset}}})}function injectEdgeLabelProxies(g){_.each(g.edges(),function(e){var edge=g.edge(e);if(edge.width&&edge.height){var v=g.node(e.v),w=g.node(e.w),label={rank:(w.rank-v.rank)/2+v.rank,e:e};util.addDummyNode(g,"edge-proxy",label,"_ep")}})}function assignRankMinMax(g){var maxRank=0;_.each(g.nodes(),function(v){var node=g.node(v);if(node.borderTop){node.minRank=g.node(node.borderTop).rank;node.maxRank=g.node(node.borderBottom).rank;maxRank=_.max(maxRank,node.maxRank)}});g.graph().maxRank=maxRank}function removeEdgeLabelProxies(g){_.each(g.nodes(),function(v){var node=g.node(v);if(node.dummy==="edge-proxy"){g.edge(node.e).labelRank=node.rank;g.removeNode(v)}})}function translateGraph(g){var minX=Number.POSITIVE_INFINITY,maxX=0,minY=Number.POSITIVE_INFINITY,maxY=0,graphLabel=g.graph(),marginX=graphLabel.marginx||0,marginY=graphLabel.marginy||0;function getExtremes(attrs){var x=attrs.x,y=attrs.y,w=attrs.width,h=attrs.height;minX=Math.min(minX,x-w/2);maxX=Math.max(maxX,x+w/2);minY=Math.min(minY,y-h/2);maxY=Math.max(maxY,y+h/2)}_.each(g.nodes(),function(v){getExtremes(g.node(v))});_.each(g.edges(),function(e){var edge=g.edge(e);if(_.has(edge,"x")){getExtremes(edge)}});minX-=marginX;minY-=marginY;_.each(g.nodes(),function(v){var node=g.node(v);node.x-=minX;node.y-=minY});_.each(g.edges(),function(e){var edge=g.edge(e);_.each(edge.points,function(p){p.x-=minX;p.y-=minY});if(_.has(edge,"x")){edge.x-=minX}if(_.has(edge,"y")){edge.y-=minY}});graphLabel.width=maxX-minX+marginX;graphLabel.height=maxY-minY+marginY}function assignNodeIntersects(g){_.each(g.edges(),function(e){var edge=g.edge(e),nodeV=g.node(e.v),nodeW=g.node(e.w),p1,p2;if(!edge.points){edge.points=[];p1=nodeW;p2=nodeV}else{p1=edge.points[0];p2=edge.points[edge.points.length-1]}edge.points.unshift(util.intersectRect(nodeV,p1));edge.points.push(util.intersectRect(nodeW,p2))})}function fixupEdgeLabelCoords(g){_.each(g.edges(),function(e){var edge=g.edge(e);if(_.has(edge,"x")){if(edge.labelpos==="l"||edge.labelpos==="r"){edge.width-=edge.labeloffset}switch(edge.labelpos){case"l":edge.x-=edge.width/2+edge.labeloffset;break;case"r":edge.x+=edge.width/2+edge.labeloffset;break}}})}function reversePointsForReversedEdges(g){_.each(g.edges(),function(e){var edge=g.edge(e);if(edge.reversed){edge.points.reverse()}})}function removeBorderNodes(g){_.each(g.nodes(),function(v){if(g.children(v).length){var node=g.node(v),t=g.node(node.borderTop),b=g.node(node.borderBottom),l=g.node(_.last(node.borderLeft)),r=g.node(_.last(node.borderRight));node.width=Math.abs(r.x-l.x);node.height=Math.abs(b.y-t.y);node.x=l.x+node.width/2;node.y=t.y+node.height/2}});_.each(g.nodes(),function(v){if(g.node(v).dummy==="border"){g.removeNode(v)}})}function removeSelfEdges(g){_.each(g.edges(),function(e){if(e.v===e.w){var node=g.node(e.v);if(!node.selfEdges){node.selfEdges=[]}node.selfEdges.push({e:e,label:g.edge(e)});g.removeEdge(e)}})}function insertSelfEdges(g){var layers=util.buildLayerMatrix(g);_.each(layers,function(layer){var orderShift=0;_.each(layer,function(v,i){var node=g.node(v);node.order=i+orderShift;_.each(node.selfEdges,function(selfEdge){util.addDummyNode(g,"selfedge",{width:selfEdge.label.width,height:selfEdge.label.height,rank:node.rank,order:i+ ++orderShift,e:selfEdge.e,label:selfEdge.label},"_se")});delete node.selfEdges})})}function positionSelfEdges(g){_.each(g.nodes(),function(v){var node=g.node(v);if(node.dummy==="selfedge"){var selfNode=g.node(node.e.v),x=selfNode.x+selfNode.width/2,y=selfNode.y,dx=node.x-x,dy=selfNode.height/2;g.setEdge(node.e,node.label);g.removeNode(v);node.label.points=[{x:x+2*dx/3,y:y-dy},{x:x+5*dx/6,y:y-dy},{x:x+dx,y:y},{x:x+5*dx/6,y:y+dy},{x:x+2*dx/3,y:y+dy}];node.label.x=node.x;node.label.y=node.y}})}function selectNumberAttrs(obj,attrs){return _.mapValues(_.pick(obj,attrs),Number)}function canonicalize(attrs){var newAttrs={};_.each(attrs,function(v,k){newAttrs[k.toLowerCase()]=v});return newAttrs}},{"./acyclic":28,"./add-border-segments":29,"./coordinate-system":30,"./graphlib":33,"./lodash":36,"./nesting-graph":37,"./normalize":38,"./order":43,"./parent-dummy-chains":48,"./position":50,"./rank":52,"./util":55}],36:[function(require,module,exports){arguments[4][20][0].apply(exports,arguments)},{dup:20,lodash:77}],37:[function(require,module,exports){var _=require("./lodash"),util=require("./util");module.exports={run:run,cleanup:cleanup};function run(g){var root=util.addDummyNode(g,"root",{},"_root"),depths=treeDepths(g),height=_.max(depths)-1,nodeSep=2*height+1;g.graph().nestingRoot=root;_.each(g.edges(),function(e){g.edge(e).minlen*=nodeSep});var weight=sumWeights(g)+1;_.each(g.children(),function(child){dfs(g,root,nodeSep,weight,height,depths,child)});g.graph().nodeRankFactor=nodeSep}function dfs(g,root,nodeSep,weight,height,depths,v){var children=g.children(v);if(!children.length){if(v!==root){g.setEdge(root,v,{weight:0,minlen:nodeSep})}return}var top=util.addBorderNode(g,"_bt"),bottom=util.addBorderNode(g,"_bb"),label=g.node(v);g.setParent(top,v);label.borderTop=top;g.setParent(bottom,v);label.borderBottom=bottom;_.each(children,function(child){dfs(g,root,nodeSep,weight,height,depths,child);var childNode=g.node(child),childTop=childNode.borderTop?childNode.borderTop:child,childBottom=childNode.borderBottom?childNode.borderBottom:child,thisWeight=childNode.borderTop?weight:2*weight,minlen=childTop!==childBottom?1:height-depths[v]+1;g.setEdge(top,childTop,{weight:thisWeight,minlen:minlen,nestingEdge:true});g.setEdge(childBottom,bottom,{weight:thisWeight,minlen:minlen,nestingEdge:true})});if(!g.parent(v)){g.setEdge(root,top,{weight:0,minlen:height+depths[v]})}}function treeDepths(g){var depths={};function dfs(v,depth){var children=g.children(v);if(children&&children.length){_.each(children,function(child){dfs(child,depth+1)})}depths[v]=depth}_.each(g.children(),function(v){dfs(v,1)});return depths}function sumWeights(g){return _.reduce(g.edges(),function(acc,e){return acc+g.edge(e).weight},0)}function cleanup(g){var graphLabel=g.graph();g.removeNode(graphLabel.nestingRoot);delete graphLabel.nestingRoot;_.each(g.edges(),function(e){var edge=g.edge(e);if(edge.nestingEdge){g.removeEdge(e)}})}},{"./lodash":36,"./util":55}],38:[function(require,module,exports){"use strict";var _=require("./lodash"),util=require("./util");module.exports={run:run,undo:undo};function run(g){g.graph().dummyChains=[];_.each(g.edges(),function(edge){normalizeEdge(g,edge)})}function normalizeEdge(g,e){var v=e.v,vRank=g.node(v).rank,w=e.w,wRank=g.node(w).rank,name=e.name,edgeLabel=g.edge(e),labelRank=edgeLabel.labelRank;if(wRank===vRank+1)return;g.removeEdge(e);var dummy,attrs,i;for(i=0,++vRank;vRank<wRank;++i,++vRank){edgeLabel.points=[];attrs={width:0,height:0,edgeLabel:edgeLabel,edgeObj:e,rank:vRank};dummy=util.addDummyNode(g,"edge",attrs,"_d");if(vRank===labelRank){attrs.width=edgeLabel.width;attrs.height=edgeLabel.height;attrs.dummy="edge-label";attrs.labelpos=edgeLabel.labelpos}g.setEdge(v,dummy,{weight:edgeLabel.weight},name);if(i===0){g.graph().dummyChains.push(dummy)}v=dummy}g.setEdge(v,w,{weight:edgeLabel.weight},name)}function undo(g){_.each(g.graph().dummyChains,function(v){var node=g.node(v),origLabel=node.edgeLabel,w;g.setEdge(node.edgeObj,origLabel);while(node.dummy){w=g.successors(v)[0];g.removeNode(v);origLabel.points.push({x:node.x,y:node.y});if(node.dummy==="edge-label"){origLabel.x=node.x;origLabel.y=node.y;origLabel.width=node.width;origLabel.height=node.height}v=w;node=g.node(v)}})}},{"./lodash":36,"./util":55}],39:[function(require,module,exports){var _=require("../lodash");module.exports=addSubgraphConstraints;function addSubgraphConstraints(g,cg,vs){var prev={},rootPrev;_.each(vs,function(v){var child=g.parent(v),parent,prevChild;while(child){parent=g.parent(child);if(parent){prevChild=prev[parent];prev[parent]=child}else{prevChild=rootPrev;rootPrev=child}if(prevChild&&prevChild!==child){cg.setEdge(prevChild,child);return}child=parent}})}},{"../lodash":36}],40:[function(require,module,exports){var _=require("../lodash");module.exports=barycenter;function barycenter(g,movable){return _.map(movable,function(v){var inV=g.inEdges(v);if(!inV.length){return{v:v}}else{var result=_.reduce(inV,function(acc,e){var edge=g.edge(e),nodeU=g.node(e.v);return{sum:acc.sum+edge.weight*nodeU.order,weight:acc.weight+edge.weight}},{sum:0,weight:0});return{v:v,barycenter:result.sum/result.weight,weight:result.weight}}})}},{"../lodash":36}],41:[function(require,module,exports){var _=require("../lodash"),Graph=require("../graphlib").Graph;module.exports=buildLayerGraph;function buildLayerGraph(g,rank,relationship){var root=createRootNode(g),result=new Graph({compound:true}).setGraph({root:root}).setDefaultNodeLabel(function(v){return g.node(v)});_.each(g.nodes(),function(v){var node=g.node(v),parent=g.parent(v);if(node.rank===rank||node.minRank<=rank&&rank<=node.maxRank){result.setNode(v);result.setParent(v,parent||root);_.each(g[relationship](v),function(e){var u=e.v===v?e.w:e.v,edge=result.edge(u,v),weight=!_.isUndefined(edge)?edge.weight:0;result.setEdge(u,v,{weight:g.edge(e).weight+weight})});if(_.has(node,"minRank")){result.setNode(v,{borderLeft:node.borderLeft[rank],borderRight:node.borderRight[rank]})}}});return result}function createRootNode(g){var v;while(g.hasNode(v=_.uniqueId("_root")));return v}},{"../graphlib":33,"../lodash":36}],42:[function(require,module,exports){"use strict";var _=require("../lodash");module.exports=crossCount;function crossCount(g,layering){var cc=0;for(var i=1;i<layering.length;++i){cc+=twoLayerCrossCount(g,layering[i-1],layering[i])}return cc}function twoLayerCrossCount(g,northLayer,southLayer){var southPos=_.zipObject(southLayer,_.map(southLayer,function(v,i){return i}));var southEntries=_.flatten(_.map(northLayer,function(v){return _.chain(g.outEdges(v)).map(function(e){return{pos:southPos[e.w],weight:g.edge(e).weight}}).sortBy("pos").value()}),true);var firstIndex=1;while(firstIndex<southLayer.length)firstIndex<<=1;var treeSize=2*firstIndex-1;firstIndex-=1;var tree=_.map(new Array(treeSize),function(){return 0});var cc=0;_.each(southEntries.forEach(function(entry){var index=entry.pos+firstIndex;tree[index]+=entry.weight;var weightSum=0;while(index>0){if(index%2){weightSum+=tree[index+1]}index=index-1>>1;tree[index]+=entry.weight}cc+=entry.weight*weightSum}));return cc}},{"../lodash":36}],43:[function(require,module,exports){"use strict";var _=require("../lodash"),initOrder=require("./init-order"),crossCount=require("./cross-count"),sortSubgraph=require("./sort-subgraph"),buildLayerGraph=require("./build-layer-graph"),addSubgraphConstraints=require("./add-subgraph-constraints"),Graph=require("../graphlib").Graph,util=require("../util");module.exports=order;function order(g){var maxRank=util.maxRank(g),downLayerGraphs=buildLayerGraphs(g,_.range(1,maxRank+1),"inEdges"),upLayerGraphs=buildLayerGraphs(g,_.range(maxRank-1,-1,-1),"outEdges");var layering=initOrder(g);assignOrder(g,layering);var bestCC=Number.POSITIVE_INFINITY,best;for(var i=0,lastBest=0;lastBest<4;++i,++lastBest){sweepLayerGraphs(i%2?downLayerGraphs:upLayerGraphs,i%4>=2);layering=util.buildLayerMatrix(g);var cc=crossCount(g,layering);if(cc<bestCC){lastBest=0;best=_.cloneDeep(layering);bestCC=cc}}assignOrder(g,best)}function buildLayerGraphs(g,ranks,relationship){return _.map(ranks,function(rank){return buildLayerGraph(g,rank,relationship)})}function sweepLayerGraphs(layerGraphs,biasRight){var cg=new Graph;_.each(layerGraphs,function(lg){var root=lg.graph().root;var sorted=sortSubgraph(lg,root,cg,biasRight);_.each(sorted.vs,function(v,i){lg.node(v).order=i});addSubgraphConstraints(lg,cg,sorted.vs)})}function assignOrder(g,layering){_.each(layering,function(layer){_.each(layer,function(v,i){g.node(v).order=i})})}},{"../graphlib":33,"../lodash":36,"../util":55,"./add-subgraph-constraints":39,"./build-layer-graph":41,"./cross-count":42,"./init-order":44,"./sort-subgraph":46}],44:[function(require,module,exports){"use strict";var _=require("../lodash");module.exports=initOrder;function initOrder(g){var visited={},simpleNodes=_.filter(g.nodes(),function(v){return!g.children(v).length}),maxRank=_.max(_.map(simpleNodes,function(v){return g.node(v).rank})),layers=_.map(_.range(maxRank+1),function(){return[]});function dfs(v){if(_.has(visited,v))return;visited[v]=true;var node=g.node(v);layers[node.rank].push(v);_.each(g.successors(v),dfs)}var orderedVs=_.sortBy(simpleNodes,function(v){return g.node(v).rank});_.each(orderedVs,dfs);return layers}},{"../lodash":36}],45:[function(require,module,exports){"use strict";var _=require("../lodash");module.exports=resolveConflicts;function resolveConflicts(entries,cg){var mappedEntries={};_.each(entries,function(entry,i){var tmp=mappedEntries[entry.v]={indegree:0,"in":[],out:[],vs:[entry.v],i:i};if(!_.isUndefined(entry.barycenter)){tmp.barycenter=entry.barycenter;tmp.weight=entry.weight}});_.each(cg.edges(),function(e){var entryV=mappedEntries[e.v],entryW=mappedEntries[e.w];if(!_.isUndefined(entryV)&&!_.isUndefined(entryW)){entryW.indegree++;entryV.out.push(mappedEntries[e.w])}});var sourceSet=_.filter(mappedEntries,function(entry){return!entry.indegree});return doResolveConflicts(sourceSet)}function doResolveConflicts(sourceSet){var entries=[];function handleIn(vEntry){return function(uEntry){if(uEntry.merged){return}if(_.isUndefined(uEntry.barycenter)||_.isUndefined(vEntry.barycenter)||uEntry.barycenter>=vEntry.barycenter){mergeEntries(vEntry,uEntry)}}}function handleOut(vEntry){return function(wEntry){wEntry["in"].push(vEntry);if(--wEntry.indegree===0){sourceSet.push(wEntry)}}}while(sourceSet.length){var entry=sourceSet.pop();entries.push(entry);_.each(entry["in"].reverse(),handleIn(entry));_.each(entry.out,handleOut(entry))}return _.chain(entries).filter(function(entry){return!entry.merged}).map(function(entry){return _.pick(entry,["vs","i","barycenter","weight"])}).value()}function mergeEntries(target,source){var sum=0,weight=0;if(target.weight){sum+=target.barycenter*target.weight;weight+=target.weight}if(source.weight){sum+=source.barycenter*source.weight;weight+=source.weight}target.vs=source.vs.concat(target.vs);target.barycenter=sum/weight;target.weight=weight;target.i=Math.min(source.i,target.i);source.merged=true}},{"../lodash":36}],46:[function(require,module,exports){var _=require("../lodash"),barycenter=require("./barycenter"),resolveConflicts=require("./resolve-conflicts"),sort=require("./sort");module.exports=sortSubgraph;function sortSubgraph(g,v,cg,biasRight){var movable=g.children(v),node=g.node(v),bl=node?node.borderLeft:undefined,br=node?node.borderRight:undefined,subgraphs={};if(bl){movable=_.filter(movable,function(w){return w!==bl&&w!==br})}var barycenters=barycenter(g,movable);_.each(barycenters,function(entry){if(g.children(entry.v).length){var subgraphResult=sortSubgraph(g,entry.v,cg,biasRight);subgraphs[entry.v]=subgraphResult;if(_.has(subgraphResult,"barycenter")){mergeBarycenters(entry,subgraphResult)}}});var entries=resolveConflicts(barycenters,cg);expandSubgraphs(entries,subgraphs);var result=sort(entries,biasRight);if(bl){result.vs=_.flatten([bl,result.vs,br],true);if(g.predecessors(bl).length){var blPred=g.node(g.predecessors(bl)[0]),brPred=g.node(g.predecessors(br)[0]);if(!_.has(result,"barycenter")){result.barycenter=0;result.weight=0}result.barycenter=(result.barycenter*result.weight+blPred.order+brPred.order)/(result.weight+2);result.weight+=2}}return result}function expandSubgraphs(entries,subgraphs){_.each(entries,function(entry){entry.vs=_.flatten(entry.vs.map(function(v){if(subgraphs[v]){return subgraphs[v].vs}return v}),true)})}function mergeBarycenters(target,other){if(!_.isUndefined(target.barycenter)){target.barycenter=(target.barycenter*target.weight+other.barycenter*other.weight)/(target.weight+other.weight);target.weight+=other.weight}else{target.barycenter=other.barycenter;target.weight=other.weight}}},{"../lodash":36,"./barycenter":40,"./resolve-conflicts":45,"./sort":47}],47:[function(require,module,exports){var _=require("../lodash"),util=require("../util");module.exports=sort;function sort(entries,biasRight){var parts=util.partition(entries,function(entry){return _.has(entry,"barycenter")});var sortable=parts.lhs,unsortable=_.sortBy(parts.rhs,function(entry){return-entry.i}),vs=[],sum=0,weight=0,vsIndex=0;sortable.sort(compareWithBias(!!biasRight));vsIndex=consumeUnsortable(vs,unsortable,vsIndex);_.each(sortable,function(entry){vsIndex+=entry.vs.length;vs.push(entry.vs);sum+=entry.barycenter*entry.weight;weight+=entry.weight;vsIndex=consumeUnsortable(vs,unsortable,vsIndex)});var result={vs:_.flatten(vs,true)};if(weight){result.barycenter=sum/weight;result.weight=weight}return result}function consumeUnsortable(vs,unsortable,index){var last;while(unsortable.length&&(last=_.last(unsortable)).i<=index){unsortable.pop();vs.push(last.vs);index++}return index}function compareWithBias(bias){return function(entryV,entryW){if(entryV.barycenter<entryW.barycenter){return-1}else if(entryV.barycenter>entryW.barycenter){return 1}return!bias?entryV.i-entryW.i:entryW.i-entryV.i}}},{"../lodash":36,"../util":55}],48:[function(require,module,exports){var _=require("./lodash");module.exports=parentDummyChains;function parentDummyChains(g){var postorderNums=postorder(g);_.each(g.graph().dummyChains,function(v){var node=g.node(v),edgeObj=node.edgeObj,pathData=findPath(g,postorderNums,edgeObj.v,edgeObj.w),path=pathData.path,lca=pathData.lca,pathIdx=0,pathV=path[pathIdx],ascending=true;while(v!==edgeObj.w){node=g.node(v);if(ascending){while((pathV=path[pathIdx])!==lca&&g.node(pathV).maxRank<node.rank){pathIdx++}if(pathV===lca){ascending=false}}if(!ascending){while(pathIdx<path.length-1&&g.node(pathV=path[pathIdx+1]).minRank<=node.rank){pathIdx++}pathV=path[pathIdx]}g.setParent(v,pathV);v=g.successors(v)[0]}})}function findPath(g,postorderNums,v,w){var vPath=[],wPath=[],low=Math.min(postorderNums[v].low,postorderNums[w].low),lim=Math.max(postorderNums[v].lim,postorderNums[w].lim),parent,lca;parent=v;do{parent=g.parent(parent);vPath.push(parent)}while(parent&&(postorderNums[parent].low>low||lim>postorderNums[parent].lim));lca=parent;parent=w;while((parent=g.parent(parent))!==lca){wPath.push(parent)}return{path:vPath.concat(wPath.reverse()),lca:lca}}function postorder(g){var result={},lim=0;function dfs(v){var low=lim;_.each(g.children(v),dfs);result[v]={low:low,lim:lim++}}_.each(g.children(),dfs);return result}},{"./lodash":36}],49:[function(require,module,exports){"use strict";var _=require("../lodash"),Graph=require("../graphlib").Graph,util=require("../util");module.exports={positionX:positionX,findType1Conflicts:findType1Conflicts,findType2Conflicts:findType2Conflicts,addConflict:addConflict,hasConflict:hasConflict,verticalAlignment:verticalAlignment,horizontalCompaction:horizontalCompaction,alignCoordinates:alignCoordinates,findSmallestWidthAlignment:findSmallestWidthAlignment,balance:balance};function findType1Conflicts(g,layering){var conflicts={};function visitLayer(prevLayer,layer){var k0=0,scanPos=0,prevLayerLength=prevLayer.length,lastNode=_.last(layer);_.each(layer,function(v,i){var w=findOtherInnerSegmentNode(g,v),k1=w?g.node(w).order:prevLayerLength;if(w||v===lastNode){_.each(layer.slice(scanPos,i+1),function(scanNode){_.each(g.predecessors(scanNode),function(u){var uLabel=g.node(u),uPos=uLabel.order;if((uPos<k0||k1<uPos)&&!(uLabel.dummy&&g.node(scanNode).dummy)){addConflict(conflicts,u,scanNode)}})});scanPos=i+1;k0=k1}});return layer}_.reduce(layering,visitLayer);return conflicts}function findType2Conflicts(g,layering){var conflicts={};function scan(south,southPos,southEnd,prevNorthBorder,nextNorthBorder){var v;_.each(_.range(southPos,southEnd),function(i){v=south[i];if(g.node(v).dummy){_.each(g.predecessors(v),function(u){var uNode=g.node(u);if(uNode.dummy&&(uNode.order<prevNorthBorder||uNode.order>nextNorthBorder)){addConflict(conflicts,u,v)}})}})}function visitLayer(north,south){var prevNorthPos=-1,nextNorthPos,southPos=0;_.each(south,function(v,southLookahead){if(g.node(v).dummy==="border"){var predecessors=g.predecessors(v);if(predecessors.length){nextNorthPos=g.node(predecessors[0]).order;scan(south,southPos,southLookahead,prevNorthPos,nextNorthPos);southPos=southLookahead;prevNorthPos=nextNorthPos}}scan(south,southPos,south.length,nextNorthPos,north.length)});return south}_.reduce(layering,visitLayer);return conflicts}function findOtherInnerSegmentNode(g,v){if(g.node(v).dummy){return _.find(g.predecessors(v),function(u){return g.node(u).dummy})}}function addConflict(conflicts,v,w){if(v>w){var tmp=v;v=w;w=tmp}var conflictsV=conflicts[v];if(!conflictsV){conflicts[v]=conflictsV={}}conflictsV[w]=true}function hasConflict(conflicts,v,w){if(v>w){var tmp=v;v=w;w=tmp}return _.has(conflicts[v],w)}function verticalAlignment(g,layering,conflicts,neighborFn){var root={},align={},pos={};_.each(layering,function(layer){_.each(layer,function(v,order){root[v]=v;align[v]=v;pos[v]=order})});_.each(layering,function(layer){var prevIdx=-1;_.each(layer,function(v){var ws=neighborFn(v);if(ws.length){ws=_.sortBy(ws,function(w){return pos[w]});var mp=(ws.length-1)/2;for(var i=Math.floor(mp),il=Math.ceil(mp);i<=il;++i){var w=ws[i];if(align[v]===v&&prevIdx<pos[w]&&!hasConflict(conflicts,v,w)){align[w]=v;align[v]=root[v]=root[w];prevIdx=pos[w]}}}})});return{root:root,align:align}}function horizontalCompaction(g,layering,root,align,reverseSep){var xs={},blockG=buildBlockGraph(g,layering,root,reverseSep);var visited={};function pass1(v){if(!_.has(visited,v)){visited[v]=true;xs[v]=_.reduce(blockG.inEdges(v),function(max,e){pass1(e.v);return Math.max(max,xs[e.v]+blockG.edge(e))},0)}}_.each(blockG.nodes(),pass1);function pass2(v){if(visited[v]!==2){visited[v]++;var min=_.reduce(blockG.outEdges(v),function(min,e){pass2(e.w);return Math.min(min,xs[e.w]-blockG.edge(e))},Number.POSITIVE_INFINITY);if(min!==Number.POSITIVE_INFINITY){xs[v]=Math.max(xs[v],min)}}}_.each(blockG.nodes(),pass2);_.each(align,function(v){xs[v]=xs[root[v]]});return xs}function buildBlockGraph(g,layering,root,reverseSep){var blockGraph=new Graph,graphLabel=g.graph(),sepFn=sep(graphLabel.nodesep,graphLabel.edgesep,reverseSep);_.each(layering,function(layer){var u;_.each(layer,function(v){var vRoot=root[v];blockGraph.setNode(vRoot);if(u){var uRoot=root[u],prevMax=blockGraph.edge(uRoot,vRoot);blockGraph.setEdge(uRoot,vRoot,Math.max(sepFn(g,v,u),prevMax||0))}u=v})});return blockGraph}function findSmallestWidthAlignment(g,xss){return _.min(xss,function(xs){var min=_.min(xs,function(x,v){return x-width(g,v)/2}),max=_.max(xs,function(x,v){return x+width(g,v)/2});return max-min})}function alignCoordinates(xss,alignTo){var alignToMin=_.min(alignTo),alignToMax=_.max(alignTo);_.each(["u","d"],function(vert){_.each(["l","r"],function(horiz){var alignment=vert+horiz,xs=xss[alignment],delta;if(xs===alignTo)return;delta=horiz==="l"?alignToMin-_.min(xs):alignToMax-_.max(xs);if(delta){xss[alignment]=_.mapValues(xs,function(x){return x+delta})}})})}function balance(xss,align){return _.mapValues(xss.ul,function(ignore,v){if(align){return xss[align.toLowerCase()][v]}else{var xs=_.sortBy(_.pluck(xss,v));return(xs[1]+xs[2])/2}})}function positionX(g){var layering=util.buildLayerMatrix(g),conflicts=_.merge(findType1Conflicts(g,layering),findType2Conflicts(g,layering));var xss={},adjustedLayering;_.each(["u","d"],function(vert){adjustedLayering=vert==="u"?layering:_.values(layering).reverse();_.each(["l","r"],function(horiz){if(horiz==="r"){adjustedLayering=_.map(adjustedLayering,function(inner){return _.values(inner).reverse()})}var neighborFn=_.bind(vert==="u"?g.predecessors:g.successors,g);var align=verticalAlignment(g,adjustedLayering,conflicts,neighborFn);var xs=horizontalCompaction(g,adjustedLayering,align.root,align.align,horiz==="r");if(horiz==="r"){xs=_.mapValues(xs,function(x){return-x})}xss[vert+horiz]=xs})});var smallestWidth=findSmallestWidthAlignment(g,xss);alignCoordinates(xss,smallestWidth);return balance(xss,g.graph().align)}function sep(nodeSep,edgeSep,reverseSep){return function(g,v,w){var vLabel=g.node(v),wLabel=g.node(w),sum=0,delta;sum+=vLabel.width/2;if(_.has(vLabel,"labelpos")){switch(vLabel.labelpos.toLowerCase()){case"l":delta=-vLabel.width/2;break;case"r":delta=vLabel.width/2;break}}if(delta){sum+=reverseSep?delta:-delta}delta=0;sum+=(vLabel.dummy?edgeSep:nodeSep)/2;sum+=(wLabel.dummy?edgeSep:nodeSep)/2;sum+=wLabel.width/2;if(_.has(wLabel,"labelpos")){switch(wLabel.labelpos.toLowerCase()){case"l":delta=wLabel.width/2;break;case"r":delta=-wLabel.width/2;break}}if(delta){sum+=reverseSep?delta:-delta}delta=0;return sum}}function width(g,v){return g.node(v).width}},{"../graphlib":33,"../lodash":36,"../util":55}],50:[function(require,module,exports){"use strict";var _=require("../lodash"),util=require("../util"),positionX=require("./bk").positionX;module.exports=position;function position(g){g=util.asNonCompoundGraph(g);positionY(g);_.each(positionX(g),function(x,v){g.node(v).x=x})}function positionY(g){var layering=util.buildLayerMatrix(g),rankSep=g.graph().ranksep,prevY=0;_.each(layering,function(layer){var maxHeight=_.max(_.map(layer,function(v){return g.node(v).height}));_.each(layer,function(v){g.node(v).y=prevY+maxHeight/2});prevY+=maxHeight+rankSep})}},{"../lodash":36,"../util":55,"./bk":49}],51:[function(require,module,exports){"use strict";var _=require("../lodash"),Graph=require("../graphlib").Graph,slack=require("./util").slack;module.exports=feasibleTree;function feasibleTree(g){var t=new Graph({directed:false});var start=g.nodes()[0],size=g.nodeCount();t.setNode(start,{});var edge,delta;while(tightTree(t,g)<size){edge=findMinSlackEdge(t,g);delta=t.hasNode(edge.v)?slack(g,edge):-slack(g,edge);shiftRanks(t,g,delta)}return t}function tightTree(t,g){function dfs(v){_.each(g.nodeEdges(v),function(e){var edgeV=e.v,w=v===edgeV?e.w:edgeV;if(!t.hasNode(w)&&!slack(g,e)){t.setNode(w,{});t.setEdge(v,w,{});dfs(w)}})}_.each(t.nodes(),dfs);return t.nodeCount()}function findMinSlackEdge(t,g){return _.min(g.edges(),function(e){if(t.hasNode(e.v)!==t.hasNode(e.w)){return slack(g,e)}})}function shiftRanks(t,g,delta){_.each(t.nodes(),function(v){g.node(v).rank+=delta})}},{"../graphlib":33,"../lodash":36,"./util":54}],52:[function(require,module,exports){"use strict";var rankUtil=require("./util"),longestPath=rankUtil.longestPath,feasibleTree=require("./feasible-tree"),networkSimplex=require("./network-simplex");module.exports=rank;function rank(g){switch(g.graph().ranker){case"network-simplex":networkSimplexRanker(g);break;case"tight-tree":tightTreeRanker(g);break;case"longest-path":longestPathRanker(g);break;default:networkSimplexRanker(g)}}var longestPathRanker=longestPath;function tightTreeRanker(g){longestPath(g);feasibleTree(g)}function networkSimplexRanker(g){networkSimplex(g)}},{"./feasible-tree":51,"./network-simplex":53,"./util":54}],53:[function(require,module,exports){"use strict";var _=require("../lodash"),feasibleTree=require("./feasible-tree"),slack=require("./util").slack,initRank=require("./util").longestPath,preorder=require("../graphlib").alg.preorder,postorder=require("../graphlib").alg.postorder,simplify=require("../util").simplify;module.exports=networkSimplex;networkSimplex.initLowLimValues=initLowLimValues;networkSimplex.initCutValues=initCutValues;networkSimplex.calcCutValue=calcCutValue;networkSimplex.leaveEdge=leaveEdge;networkSimplex.enterEdge=enterEdge;networkSimplex.exchangeEdges=exchangeEdges;function networkSimplex(g){g=simplify(g);initRank(g);var t=feasibleTree(g);initLowLimValues(t);initCutValues(t,g);var e,f;while(e=leaveEdge(t)){f=enterEdge(t,g,e);exchangeEdges(t,g,e,f)}}function initCutValues(t,g){var vs=postorder(t,t.nodes());vs=vs.slice(0,vs.length-1);_.each(vs,function(v){assignCutValue(t,g,v)})}function assignCutValue(t,g,child){var childLab=t.node(child),parent=childLab.parent;t.edge(child,parent).cutvalue=calcCutValue(t,g,child)}function calcCutValue(t,g,child){var childLab=t.node(child),parent=childLab.parent,childIsTail=true,graphEdge=g.edge(child,parent),cutValue=0;if(!graphEdge){childIsTail=false;graphEdge=g.edge(parent,child)}cutValue=graphEdge.weight;_.each(g.nodeEdges(child),function(e){var isOutEdge=e.v===child,other=isOutEdge?e.w:e.v;if(other!==parent){var pointsToHead=isOutEdge===childIsTail,otherWeight=g.edge(e).weight;cutValue+=pointsToHead?otherWeight:-otherWeight;if(isTreeEdge(t,child,other)){var otherCutValue=t.edge(child,other).cutvalue;cutValue+=pointsToHead?-otherCutValue:otherCutValue}}});return cutValue}function initLowLimValues(tree,root){if(arguments.length<2){root=tree.nodes()[0]}dfsAssignLowLim(tree,{},1,root)}function dfsAssignLowLim(tree,visited,nextLim,v,parent){var low=nextLim,label=tree.node(v);visited[v]=true;_.each(tree.neighbors(v),function(w){if(!_.has(visited,w)){nextLim=dfsAssignLowLim(tree,visited,nextLim,w,v)}});label.low=low;label.lim=nextLim++;if(parent){label.parent=parent}else{delete label.parent}return nextLim}function leaveEdge(tree){return _.find(tree.edges(),function(e){return tree.edge(e).cutvalue<0})}function enterEdge(t,g,edge){var v=edge.v,w=edge.w;if(!g.hasEdge(v,w)){v=edge.w;w=edge.v}var vLabel=t.node(v),wLabel=t.node(w),tailLabel=vLabel,flip=false;if(vLabel.lim>wLabel.lim){tailLabel=wLabel;flip=true}var candidates=_.filter(g.edges(),function(edge){return flip===isDescendant(t,t.node(edge.v),tailLabel)&&flip!==isDescendant(t,t.node(edge.w),tailLabel)});return _.min(candidates,function(edge){return slack(g,edge)})}function exchangeEdges(t,g,e,f){var v=e.v,w=e.w;t.removeEdge(v,w);t.setEdge(f.v,f.w,{});initLowLimValues(t);initCutValues(t,g);updateRanks(t,g)}function updateRanks(t,g){var root=_.find(t.nodes(),function(v){return!g.node(v).parent}),vs=preorder(t,root);vs=vs.slice(1);_.each(vs,function(v){var parent=t.node(v).parent,edge=g.edge(v,parent),flipped=false;if(!edge){edge=g.edge(parent,v);flipped=true}g.node(v).rank=g.node(parent).rank+(flipped?edge.minlen:-edge.minlen)})}function isTreeEdge(tree,u,v){return tree.hasEdge(u,v)}function isDescendant(tree,vLabel,rootLabel){return rootLabel.low<=vLabel.lim&&vLabel.lim<=rootLabel.lim}},{"../graphlib":33,"../lodash":36,"../util":55,"./feasible-tree":51,"./util":54}],54:[function(require,module,exports){"use strict";var _=require("../lodash");module.exports={longestPath:longestPath,slack:slack};function longestPath(g){var visited={};function dfs(v){var label=g.node(v);if(_.has(visited,v)){return label.rank}visited[v]=true;var rank=_.min(_.map(g.outEdges(v),function(e){return dfs(e.w)-g.edge(e).minlen}));if(rank===Number.POSITIVE_INFINITY){rank=0}return label.rank=rank}_.each(g.sources(),dfs)}function slack(g,e){return g.node(e.w).rank-g.node(e.v).rank-g.edge(e).minlen}},{"../lodash":36}],55:[function(require,module,exports){"use strict";var _=require("./lodash"),Graph=require("./graphlib").Graph;module.exports={addDummyNode:addDummyNode,simplify:simplify,asNonCompoundGraph:asNonCompoundGraph,successorWeights:successorWeights,predecessorWeights:predecessorWeights,intersectRect:intersectRect,buildLayerMatrix:buildLayerMatrix,normalizeRanks:normalizeRanks,removeEmptyRanks:removeEmptyRanks,addBorderNode:addBorderNode,maxRank:maxRank,partition:partition,time:time,notime:notime};function addDummyNode(g,type,attrs,name){var v;do{v=_.uniqueId(name)}while(g.hasNode(v));attrs.dummy=type;g.setNode(v,attrs);return v}function simplify(g){var simplified=(new Graph).setGraph(g.graph());_.each(g.nodes(),function(v){simplified.setNode(v,g.node(v))});_.each(g.edges(),function(e){var simpleLabel=simplified.edge(e.v,e.w)||{weight:0,minlen:1},label=g.edge(e);simplified.setEdge(e.v,e.w,{weight:simpleLabel.weight+label.weight,minlen:Math.max(simpleLabel.minlen,label.minlen)})});return simplified}function asNonCompoundGraph(g){
-var simplified=new Graph({multigraph:g.isMultigraph()}).setGraph(g.graph());_.each(g.nodes(),function(v){if(!g.children(v).length){simplified.setNode(v,g.node(v))}});_.each(g.edges(),function(e){simplified.setEdge(e,g.edge(e))});return simplified}function successorWeights(g){var weightMap=_.map(g.nodes(),function(v){var sucs={};_.each(g.outEdges(v),function(e){sucs[e.w]=(sucs[e.w]||0)+g.edge(e).weight});return sucs});return _.zipObject(g.nodes(),weightMap)}function predecessorWeights(g){var weightMap=_.map(g.nodes(),function(v){var preds={};_.each(g.inEdges(v),function(e){preds[e.v]=(preds[e.v]||0)+g.edge(e).weight});return preds});return _.zipObject(g.nodes(),weightMap)}function intersectRect(rect,point){var x=rect.x;var y=rect.y;var dx=point.x-x;var dy=point.y-y;var w=rect.width/2;var h=rect.height/2;if(!dx&&!dy){throw new Error("Not possible to find intersection inside of the rectangle")}var sx,sy;if(Math.abs(dy)*w>Math.abs(dx)*h){if(dy<0){h=-h}sx=h*dx/dy;sy=h}else{if(dx<0){w=-w}sx=w;sy=w*dy/dx}return{x:x+sx,y:y+sy}}function buildLayerMatrix(g){var layering=_.map(_.range(maxRank(g)+1),function(){return[]});_.each(g.nodes(),function(v){var node=g.node(v),rank=node.rank;if(!_.isUndefined(rank)){layering[rank][node.order]=v}});return layering}function normalizeRanks(g){var min=_.min(_.map(g.nodes(),function(v){return g.node(v).rank}));_.each(g.nodes(),function(v){var node=g.node(v);if(_.has(node,"rank")){node.rank-=min}})}function removeEmptyRanks(g){var offset=_.min(_.map(g.nodes(),function(v){return g.node(v).rank}));var layers=[];_.each(g.nodes(),function(v){var rank=g.node(v).rank-offset;if(!_.has(layers,rank)){layers[rank]=[]}layers[rank].push(v)});var delta=0,nodeRankFactor=g.graph().nodeRankFactor;_.each(layers,function(vs,i){if(_.isUndefined(vs)&&i%nodeRankFactor!==0){--delta}else if(delta){_.each(vs,function(v){g.node(v).rank+=delta})}})}function addBorderNode(g,prefix,rank,order){var node={width:0,height:0};if(arguments.length>=4){node.rank=rank;node.order=order}return addDummyNode(g,"border",node,prefix)}function maxRank(g){return _.max(_.map(g.nodes(),function(v){var rank=g.node(v).rank;if(!_.isUndefined(rank)){return rank}}))}function partition(collection,fn){var result={lhs:[],rhs:[]};_.each(collection,function(value){if(fn(value)){result.lhs.push(value)}else{result.rhs.push(value)}});return result}function time(name,fn){var start=_.now();try{return fn()}finally{console.log(name+" time: "+(_.now()-start)+"ms")}}function notime(name,fn){return fn()}},{"./graphlib":33,"./lodash":36}],56:[function(require,module,exports){module.exports="0.7.1"},{}],57:[function(require,module,exports){var lib=require("./lib");module.exports={Graph:lib.Graph,json:require("./lib/json"),alg:require("./lib/alg"),version:lib.version}},{"./lib":73,"./lib/alg":64,"./lib/json":74}],58:[function(require,module,exports){var _=require("../lodash");module.exports=components;function components(g){var visited={},cmpts=[],cmpt;function dfs(v){if(_.has(visited,v))return;visited[v]=true;cmpt.push(v);_.each(g.successors(v),dfs);_.each(g.predecessors(v),dfs)}_.each(g.nodes(),function(v){cmpt=[];dfs(v);if(cmpt.length){cmpts.push(cmpt)}});return cmpts}},{"../lodash":75}],59:[function(require,module,exports){var _=require("../lodash");module.exports=dfs;function dfs(g,vs,order){if(!_.isArray(vs)){vs=[vs]}var acc=[],visited={};_.each(vs,function(v){if(!g.hasNode(v)){throw new Error("Graph does not have node: "+v)}doDfs(g,v,order==="post",visited,acc)});return acc}function doDfs(g,v,postorder,visited,acc){if(!_.has(visited,v)){visited[v]=true;if(!postorder){acc.push(v)}_.each(g.neighbors(v),function(w){doDfs(g,w,postorder,visited,acc)});if(postorder){acc.push(v)}}}},{"../lodash":75}],60:[function(require,module,exports){var dijkstra=require("./dijkstra"),_=require("../lodash");module.exports=dijkstraAll;function dijkstraAll(g,weightFunc,edgeFunc){return _.transform(g.nodes(),function(acc,v){acc[v]=dijkstra(g,v,weightFunc,edgeFunc)},{})}},{"../lodash":75,"./dijkstra":61}],61:[function(require,module,exports){var _=require("../lodash"),PriorityQueue=require("../data/priority-queue");module.exports=dijkstra;var DEFAULT_WEIGHT_FUNC=_.constant(1);function dijkstra(g,source,weightFn,edgeFn){return runDijkstra(g,String(source),weightFn||DEFAULT_WEIGHT_FUNC,edgeFn||function(v){return g.outEdges(v)})}function runDijkstra(g,source,weightFn,edgeFn){var results={},pq=new PriorityQueue,v,vEntry;var updateNeighbors=function(edge){var w=edge.v!==v?edge.v:edge.w,wEntry=results[w],weight=weightFn(edge),distance=vEntry.distance+weight;if(weight<0){throw new Error("dijkstra does not allow negative edge weights. "+"Bad edge: "+edge+" Weight: "+weight)}if(distance<wEntry.distance){wEntry.distance=distance;wEntry.predecessor=v;pq.decrease(w,distance)}};g.nodes().forEach(function(v){var distance=v===source?0:Number.POSITIVE_INFINITY;results[v]={distance:distance};pq.add(v,distance)});while(pq.size()>0){v=pq.removeMin();vEntry=results[v];if(vEntry.distance===Number.POSITIVE_INFINITY){break}edgeFn(v).forEach(updateNeighbors)}return results}},{"../data/priority-queue":71,"../lodash":75}],62:[function(require,module,exports){var _=require("../lodash"),tarjan=require("./tarjan");module.exports=findCycles;function findCycles(g){return _.filter(tarjan(g),function(cmpt){return cmpt.length>1})}},{"../lodash":75,"./tarjan":69}],63:[function(require,module,exports){var _=require("../lodash");module.exports=floydWarshall;var DEFAULT_WEIGHT_FUNC=_.constant(1);function floydWarshall(g,weightFn,edgeFn){return runFloydWarshall(g,weightFn||DEFAULT_WEIGHT_FUNC,edgeFn||function(v){return g.outEdges(v)})}function runFloydWarshall(g,weightFn,edgeFn){var results={},nodes=g.nodes();nodes.forEach(function(v){results[v]={};results[v][v]={distance:0};nodes.forEach(function(w){if(v!==w){results[v][w]={distance:Number.POSITIVE_INFINITY}}});edgeFn(v).forEach(function(edge){var w=edge.v===v?edge.w:edge.v,d=weightFn(edge);results[v][w]={distance:d,predecessor:v}})});nodes.forEach(function(k){var rowK=results[k];nodes.forEach(function(i){var rowI=results[i];nodes.forEach(function(j){var ik=rowI[k];var kj=rowK[j];var ij=rowI[j];var altDistance=ik.distance+kj.distance;if(altDistance<ij.distance){ij.distance=altDistance;ij.predecessor=kj.predecessor}})})});return results}},{"../lodash":75}],64:[function(require,module,exports){module.exports={components:require("./components"),dijkstra:require("./dijkstra"),dijkstraAll:require("./dijkstra-all"),findCycles:require("./find-cycles"),floydWarshall:require("./floyd-warshall"),isAcyclic:require("./is-acyclic"),postorder:require("./postorder"),preorder:require("./preorder"),prim:require("./prim"),tarjan:require("./tarjan"),topsort:require("./topsort")}},{"./components":58,"./dijkstra":61,"./dijkstra-all":60,"./find-cycles":62,"./floyd-warshall":63,"./is-acyclic":65,"./postorder":66,"./preorder":67,"./prim":68,"./tarjan":69,"./topsort":70}],65:[function(require,module,exports){var topsort=require("./topsort");module.exports=isAcyclic;function isAcyclic(g){try{topsort(g)}catch(e){if(e instanceof topsort.CycleException){return false}throw e}return true}},{"./topsort":70}],66:[function(require,module,exports){var dfs=require("./dfs");module.exports=postorder;function postorder(g,vs){return dfs(g,vs,"post")}},{"./dfs":59}],67:[function(require,module,exports){var dfs=require("./dfs");module.exports=preorder;function preorder(g,vs){return dfs(g,vs,"pre")}},{"./dfs":59}],68:[function(require,module,exports){var _=require("../lodash"),Graph=require("../graph"),PriorityQueue=require("../data/priority-queue");module.exports=prim;function prim(g,weightFunc){var result=new Graph,parents={},pq=new PriorityQueue,v;function updateNeighbors(edge){var w=edge.v===v?edge.w:edge.v,pri=pq.priority(w);if(pri!==undefined){var edgeWeight=weightFunc(edge);if(edgeWeight<pri){parents[w]=v;pq.decrease(w,edgeWeight)}}}if(g.nodeCount()===0){return result}_.each(g.nodes(),function(v){pq.add(v,Number.POSITIVE_INFINITY);result.setNode(v)});pq.decrease(g.nodes()[0],0);var init=false;while(pq.size()>0){v=pq.removeMin();if(_.has(parents,v)){result.setEdge(v,parents[v])}else if(init){throw new Error("Input graph is not connected: "+g)}else{init=true}g.nodeEdges(v).forEach(updateNeighbors)}return result}},{"../data/priority-queue":71,"../graph":72,"../lodash":75}],69:[function(require,module,exports){var _=require("../lodash");module.exports=tarjan;function tarjan(g){var index=0,stack=[],visited={},results=[];function dfs(v){var entry=visited[v]={onStack:true,lowlink:index,index:index++};stack.push(v);g.successors(v).forEach(function(w){if(!_.has(visited,w)){dfs(w);entry.lowlink=Math.min(entry.lowlink,visited[w].lowlink)}else if(visited[w].onStack){entry.lowlink=Math.min(entry.lowlink,visited[w].index)}});if(entry.lowlink===entry.index){var cmpt=[],w;do{w=stack.pop();visited[w].onStack=false;cmpt.push(w)}while(v!==w);results.push(cmpt)}}g.nodes().forEach(function(v){if(!_.has(visited,v)){dfs(v)}});return results}},{"../lodash":75}],70:[function(require,module,exports){var _=require("../lodash");module.exports=topsort;topsort.CycleException=CycleException;function topsort(g){var visited={},stack={},results=[];function visit(node){if(_.has(stack,node)){throw new CycleException}if(!_.has(visited,node)){stack[node]=true;visited[node]=true;_.each(g.predecessors(node),visit);delete stack[node];results.push(node)}}_.each(g.sinks(),visit);if(_.size(visited)!==g.nodeCount()){throw new CycleException}return results}function CycleException(){}},{"../lodash":75}],71:[function(require,module,exports){var _=require("../lodash");module.exports=PriorityQueue;function PriorityQueue(){this._arr=[];this._keyIndices={}}PriorityQueue.prototype.size=function(){return this._arr.length};PriorityQueue.prototype.keys=function(){return this._arr.map(function(x){return x.key})};PriorityQueue.prototype.has=function(key){return _.has(this._keyIndices,key)};PriorityQueue.prototype.priority=function(key){var index=this._keyIndices[key];if(index!==undefined){return this._arr[index].priority}};PriorityQueue.prototype.min=function(){if(this.size()===0){throw new Error("Queue underflow")}return this._arr[0].key};PriorityQueue.prototype.add=function(key,priority){var keyIndices=this._keyIndices;key=String(key);if(!_.has(keyIndices,key)){var arr=this._arr;var index=arr.length;keyIndices[key]=index;arr.push({key:key,priority:priority});this._decrease(index);return true}return false};PriorityQueue.prototype.removeMin=function(){this._swap(0,this._arr.length-1);var min=this._arr.pop();delete this._keyIndices[min.key];this._heapify(0);return min.key};PriorityQueue.prototype.decrease=function(key,priority){var index=this._keyIndices[key];if(priority>this._arr[index].priority){throw new Error("New priority is greater than current priority. "+"Key: "+key+" Old: "+this._arr[index].priority+" New: "+priority)}this._arr[index].priority=priority;this._decrease(index)};PriorityQueue.prototype._heapify=function(i){var arr=this._arr;var l=2*i,r=l+1,largest=i;if(l<arr.length){largest=arr[l].priority<arr[largest].priority?l:largest;if(r<arr.length){largest=arr[r].priority<arr[largest].priority?r:largest}if(largest!==i){this._swap(i,largest);this._heapify(largest)}}};PriorityQueue.prototype._decrease=function(index){var arr=this._arr;var priority=arr[index].priority;var parent;while(index!==0){parent=index>>1;if(arr[parent].priority<priority){break}this._swap(index,parent);index=parent}};PriorityQueue.prototype._swap=function(i,j){var arr=this._arr;var keyIndices=this._keyIndices;var origArrI=arr[i];var origArrJ=arr[j];arr[i]=origArrJ;arr[j]=origArrI;keyIndices[origArrJ.key]=i;keyIndices[origArrI.key]=j}},{"../lodash":75}],72:[function(require,module,exports){"use strict";var _=require("./lodash");module.exports=Graph;var DEFAULT_EDGE_NAME="\x00",GRAPH_NODE="\x00",EDGE_KEY_DELIM="";function Graph(opts){this._isDirected=_.has(opts,"directed")?opts.directed:true;this._isMultigraph=_.has(opts,"multigraph")?opts.multigraph:false;this._isCompound=_.has(opts,"compound")?opts.compound:false;this._label=undefined;this._defaultNodeLabelFn=_.constant(undefined);this._defaultEdgeLabelFn=_.constant(undefined);this._nodes={};if(this._isCompound){this._parent={};this._children={};this._children[GRAPH_NODE]={}}this._in={};this._preds={};this._out={};this._sucs={};this._edgeObjs={};this._edgeLabels={}}Graph.prototype._nodeCount=0;Graph.prototype._edgeCount=0;Graph.prototype.isDirected=function(){return this._isDirected};Graph.prototype.isMultigraph=function(){return this._isMultigraph};Graph.prototype.isCompound=function(){return this._isCompound};Graph.prototype.setGraph=function(label){this._label=label;return this};Graph.prototype.graph=function(){return this._label};Graph.prototype.setDefaultNodeLabel=function(newDefault){if(!_.isFunction(newDefault)){newDefault=_.constant(newDefault)}this._defaultNodeLabelFn=newDefault;return this};Graph.prototype.nodeCount=function(){return this._nodeCount};Graph.prototype.nodes=function(){return _.keys(this._nodes)};Graph.prototype.sources=function(){return _.filter(this.nodes(),function(v){return _.isEmpty(this._in[v])},this)};Graph.prototype.sinks=function(){return _.filter(this.nodes(),function(v){return _.isEmpty(this._out[v])},this)};Graph.prototype.setNodes=function(vs,value){var args=arguments;_.each(vs,function(v){if(args.length>1){this.setNode(v,value)}else{this.setNode(v)}},this);return this};Graph.prototype.setNode=function(v,value){if(_.has(this._nodes,v)){if(arguments.length>1){this._nodes[v]=value}return this}this._nodes[v]=arguments.length>1?value:this._defaultNodeLabelFn(v);if(this._isCompound){this._parent[v]=GRAPH_NODE;this._children[v]={};this._children[GRAPH_NODE][v]=true}this._in[v]={};this._preds[v]={};this._out[v]={};this._sucs[v]={};++this._nodeCount;return this};Graph.prototype.node=function(v){return this._nodes[v]};Graph.prototype.hasNode=function(v){return _.has(this._nodes,v)};Graph.prototype.removeNode=function(v){var self=this;if(_.has(this._nodes,v)){var removeEdge=function(e){self.removeEdge(self._edgeObjs[e])};delete this._nodes[v];if(this._isCompound){this._removeFromParentsChildList(v);delete this._parent[v];_.each(this.children(v),function(child){this.setParent(child)},this);delete this._children[v]}_.each(_.keys(this._in[v]),removeEdge);delete this._in[v];delete this._preds[v];_.each(_.keys(this._out[v]),removeEdge);delete this._out[v];delete this._sucs[v];--this._nodeCount}return this};Graph.prototype.setParent=function(v,parent){if(!this._isCompound){throw new Error("Cannot set parent in a non-compound graph")}if(_.isUndefined(parent)){parent=GRAPH_NODE}else{for(var ancestor=parent;!_.isUndefined(ancestor);ancestor=this.parent(ancestor)){if(ancestor===v){throw new Error("Setting "+parent+" as parent of "+v+" would create create a cycle")}}this.setNode(parent)}this.setNode(v);this._removeFromParentsChildList(v);this._parent[v]=parent;this._children[parent][v]=true;return this};Graph.prototype._removeFromParentsChildList=function(v){delete this._children[this._parent[v]][v]};Graph.prototype.parent=function(v){if(this._isCompound){var parent=this._parent[v];if(parent!==GRAPH_NODE){return parent}}};Graph.prototype.children=function(v){if(_.isUndefined(v)){v=GRAPH_NODE}if(this._isCompound){var children=this._children[v];if(children){return _.keys(children)}}else if(v===GRAPH_NODE){return this.nodes()}else if(this.hasNode(v)){return[]}};Graph.prototype.predecessors=function(v){var predsV=this._preds[v];if(predsV){return _.keys(predsV)}};Graph.prototype.successors=function(v){var sucsV=this._sucs[v];if(sucsV){return _.keys(sucsV)}};Graph.prototype.neighbors=function(v){var preds=this.predecessors(v);if(preds){return _.union(preds,this.successors(v))}};Graph.prototype.setDefaultEdgeLabel=function(newDefault){if(!_.isFunction(newDefault)){newDefault=_.constant(newDefault)}this._defaultEdgeLabelFn=newDefault;return this};Graph.prototype.edgeCount=function(){return this._edgeCount};Graph.prototype.edges=function(){return _.values(this._edgeObjs)};Graph.prototype.setPath=function(vs,value){var self=this,args=arguments;_.reduce(vs,function(v,w){if(args.length>1){self.setEdge(v,w,value)}else{self.setEdge(v,w)}return w});return this};Graph.prototype.setEdge=function(){var v,w,name,value,valueSpecified=false;if(_.isPlainObject(arguments[0])){v=arguments[0].v;w=arguments[0].w;name=arguments[0].name;if(arguments.length===2){value=arguments[1];valueSpecified=true}}else{v=arguments[0];w=arguments[1];name=arguments[3];if(arguments.length>2){value=arguments[2];valueSpecified=true}}v=""+v;w=""+w;if(!_.isUndefined(name)){name=""+name}var e=edgeArgsToId(this._isDirected,v,w,name);if(_.has(this._edgeLabels,e)){if(valueSpecified){this._edgeLabels[e]=value}return this}if(!_.isUndefined(name)&&!this._isMultigraph){throw new Error("Cannot set a named edge when isMultigraph = false")}this.setNode(v);this.setNode(w);this._edgeLabels[e]=valueSpecified?value:this._defaultEdgeLabelFn(v,w,name);var edgeObj=edgeArgsToObj(this._isDirected,v,w,name);v=edgeObj.v;w=edgeObj.w;Object.freeze(edgeObj);this._edgeObjs[e]=edgeObj;incrementOrInitEntry(this._preds[w],v);incrementOrInitEntry(this._sucs[v],w);this._in[w][e]=edgeObj;this._out[v][e]=edgeObj;this._edgeCount++;return this};Graph.prototype.edge=function(v,w,name){var e=arguments.length===1?edgeObjToId(this._isDirected,arguments[0]):edgeArgsToId(this._isDirected,v,w,name);return this._edgeLabels[e]};Graph.prototype.hasEdge=function(v,w,name){var e=arguments.length===1?edgeObjToId(this._isDirected,arguments[0]):edgeArgsToId(this._isDirected,v,w,name);return _.has(this._edgeLabels,e)};Graph.prototype.removeEdge=function(v,w,name){var e=arguments.length===1?edgeObjToId(this._isDirected,arguments[0]):edgeArgsToId(this._isDirected,v,w,name),edge=this._edgeObjs[e];if(edge){v=edge.v;w=edge.w;delete this._edgeLabels[e];delete this._edgeObjs[e];decrementOrRemoveEntry(this._preds[w],v);decrementOrRemoveEntry(this._sucs[v],w);delete this._in[w][e];delete this._out[v][e];this._edgeCount--}return this};Graph.prototype.inEdges=function(v,u){var inV=this._in[v];if(inV){var edges=_.values(inV);if(!u){return edges}return _.filter(edges,function(edge){return edge.v===u})}};Graph.prototype.outEdges=function(v,w){var outV=this._out[v];if(outV){var edges=_.values(outV);if(!w){return edges}return _.filter(edges,function(edge){return edge.w===w})}};Graph.prototype.nodeEdges=function(v,w){var inEdges=this.inEdges(v,w);if(inEdges){return inEdges.concat(this.outEdges(v,w))}};function incrementOrInitEntry(map,k){if(_.has(map,k)){map[k]++}else{map[k]=1}}function decrementOrRemoveEntry(map,k){if(!--map[k]){delete map[k]}}function edgeArgsToId(isDirected,v,w,name){if(!isDirected&&v>w){var tmp=v;v=w;w=tmp}return v+EDGE_KEY_DELIM+w+EDGE_KEY_DELIM+(_.isUndefined(name)?DEFAULT_EDGE_NAME:name)}function edgeArgsToObj(isDirected,v,w,name){if(!isDirected&&v>w){var tmp=v;v=w;w=tmp}var edgeObj={v:v,w:w};if(name){edgeObj.name=name}return edgeObj}function edgeObjToId(isDirected,edgeObj){return edgeArgsToId(isDirected,edgeObj.v,edgeObj.w,edgeObj.name)}},{"./lodash":75}],73:[function(require,module,exports){module.exports={Graph:require("./graph"),version:require("./version")}},{"./graph":72,"./version":76}],74:[function(require,module,exports){var _=require("./lodash"),Graph=require("./graph");module.exports={write:write,read:read};function write(g){var json={options:{directed:g.isDirected(),multigraph:g.isMultigraph(),compound:g.isCompound()},nodes:writeNodes(g),edges:writeEdges(g)};if(!_.isUndefined(g.graph())){json.value=_.clone(g.graph())}return json}function writeNodes(g){return _.map(g.nodes(),function(v){var nodeValue=g.node(v),parent=g.parent(v),node={v:v};if(!_.isUndefined(nodeValue)){node.value=nodeValue}if(!_.isUndefined(parent)){node.parent=parent}return node})}function writeEdges(g){return _.map(g.edges(),function(e){var edgeValue=g.edge(e),edge={v:e.v,w:e.w};if(!_.isUndefined(e.name)){edge.name=e.name}if(!_.isUndefined(edgeValue)){edge.value=edgeValue}return edge})}function read(json){var g=new Graph(json.options).setGraph(json.value);_.each(json.nodes,function(entry){g.setNode(entry.v,entry.value);if(entry.parent){g.setParent(entry.v,entry.parent)}});_.each(json.edges,function(entry){g.setEdge({v:entry.v,w:entry.w,name:entry.name},entry.value)});return g}},{"./graph":72,"./lodash":75}],75:[function(require,module,exports){arguments[4][20][0].apply(exports,arguments)},{dup:20,lodash:77}],76:[function(require,module,exports){module.exports="1.0.1"},{}],77:[function(require,module,exports){(function(global){(function(){var undefined;var arrayPool=[],objectPool=[];var idCounter=0;var keyPrefix=+new Date+"";var largeArraySize=75;var maxPoolSize=40;var whitespace=" 	\f \ufeff"+"\n\r\u2028\u2029"+" ᠎             　";var reEmptyStringLeading=/\b__p \+= '';/g,reEmptyStringMiddle=/\b(__p \+=) '' \+/g,reEmptyStringTrailing=/(__e\(.*?\)|\b__t\)) \+\n'';/g;var reEsTemplate=/\$\{([^\\}]*(?:\\.[^\\}]*)*)\}/g;var reFlags=/\w*$/;var reFuncName=/^\s*function[ \n\r\t]+\w/;var reInterpolate=/<%=([\s\S]+?)%>/g;var reLeadingSpacesAndZeros=RegExp("^["+whitespace+"]*0+(?=.$)");var reNoMatch=/($^)/;var reThis=/\bthis\b/;var reUnescapedString=/['\n\r\t\u2028\u2029\\]/g;var contextProps=["Array","Boolean","Date","Function","Math","Number","Object","RegExp","String","_","attachEvent","clearTimeout","isFinite","isNaN","parseInt","setTimeout"];var templateCounter=0;var argsClass="[object Arguments]",arrayClass="[object Array]",boolClass="[object Boolean]",dateClass="[object Date]",funcClass="[object Function]",numberClass="[object Number]",objectClass="[object Object]",regexpClass="[object RegExp]",stringClass="[object String]";var cloneableClasses={};cloneableClasses[funcClass]=false;cloneableClasses[argsClass]=cloneableClasses[arrayClass]=cloneableClasses[boolClass]=cloneableClasses[dateClass]=cloneableClasses[numberClass]=cloneableClasses[objectClass]=cloneableClasses[regexpClass]=cloneableClasses[stringClass]=true;var debounceOptions={leading:false,maxWait:0,trailing:false};var descriptor={configurable:false,enumerable:false,value:null,writable:false};var objectTypes={"boolean":false,"function":true,object:true,number:false,string:false,undefined:false};var stringEscapes={"\\":"\\","'":"'","\n":"n","\r":"r","	":"t","\u2028":"u2028","\u2029":"u2029"};var root=objectTypes[typeof window]&&window||this;var freeExports=objectTypes[typeof exports]&&exports&&!exports.nodeType&&exports;var freeModule=objectTypes[typeof module]&&module&&!module.nodeType&&module;var moduleExports=freeModule&&freeModule.exports===freeExports&&freeExports;var freeGlobal=objectTypes[typeof global]&&global;if(freeGlobal&&(freeGlobal.global===freeGlobal||freeGlobal.window===freeGlobal)){root=freeGlobal}function baseIndexOf(array,value,fromIndex){var index=(fromIndex||0)-1,length=array?array.length:0;while(++index<length){if(array[index]===value){return index}}return-1}function cacheIndexOf(cache,value){var type=typeof value;cache=cache.cache;if(type=="boolean"||value==null){return cache[value]?0:-1}if(type!="number"&&type!="string"){type="object"}var key=type=="number"?value:keyPrefix+value;cache=(cache=cache[type])&&cache[key];return type=="object"?cache&&baseIndexOf(cache,value)>-1?0:-1:cache?0:-1}function cachePush(value){var cache=this.cache,type=typeof value;if(type=="boolean"||value==null){cache[value]=true}else{if(type!="number"&&type!="string"){type="object"}var key=type=="number"?value:keyPrefix+value,typeCache=cache[type]||(cache[type]={});if(type=="object"){(typeCache[key]||(typeCache[key]=[])).push(value)}else{typeCache[key]=true}}}function charAtCallback(value){return value.charCodeAt(0)}function compareAscending(a,b){var ac=a.criteria,bc=b.criteria,index=-1,length=ac.length;while(++index<length){var value=ac[index],other=bc[index];if(value!==other){if(value>other||typeof value=="undefined"){return 1}if(value<other||typeof other=="undefined"){return-1}}}return a.index-b.index}function createCache(array){var index=-1,length=array.length,first=array[0],mid=array[length/2|0],last=array[length-1];if(first&&typeof first=="object"&&mid&&typeof mid=="object"&&last&&typeof last=="object"){return false}var cache=getObject();cache["false"]=cache["null"]=cache["true"]=cache["undefined"]=false;var result=getObject();result.array=array;result.cache=cache;result.push=cachePush;while(++index<length){result.push(array[index])}return result}function escapeStringChar(match){return"\\"+stringEscapes[match]}function getArray(){return arrayPool.pop()||[]}function getObject(){return objectPool.pop()||{array:null,cache:null,criteria:null,"false":false,index:0,"null":false,number:null,object:null,push:null,string:null,"true":false,undefined:false,value:null}}function releaseArray(array){array.length=0;if(arrayPool.length<maxPoolSize){arrayPool.push(array)}}function releaseObject(object){var cache=object.cache;if(cache){releaseObject(cache)}object.array=object.cache=object.criteria=object.object=object.number=object.string=object.value=null;if(objectPool.length<maxPoolSize){objectPool.push(object)}}function slice(array,start,end){start||(start=0);if(typeof end=="undefined"){end=array?array.length:0}var index=-1,length=end-start||0,result=Array(length<0?0:length);while(++index<length){result[index]=array[start+index]}return result}function runInContext(context){context=context?_.defaults(root.Object(),context,_.pick(root,contextProps)):root;var Array=context.Array,Boolean=context.Boolean,Date=context.Date,Function=context.Function,Math=context.Math,Number=context.Number,Object=context.Object,RegExp=context.RegExp,String=context.String,TypeError=context.TypeError;var arrayRef=[];var objectProto=Object.prototype;var oldDash=context._;var toString=objectProto.toString;var reNative=RegExp("^"+String(toString).replace(/[.*+?^${}()|[\]\\]/g,"\\$&").replace(/toString| for [^\]]+/g,".*?")+"$");var ceil=Math.ceil,clearTimeout=context.clearTimeout,floor=Math.floor,fnToString=Function.prototype.toString,getPrototypeOf=isNative(getPrototypeOf=Object.getPrototypeOf)&&getPrototypeOf,hasOwnProperty=objectProto.hasOwnProperty,push=arrayRef.push,setTimeout=context.setTimeout,splice=arrayRef.splice,unshift=arrayRef.unshift;var defineProperty=function(){try{var o={},func=isNative(func=Object.defineProperty)&&func,result=func(o,o,o)&&func}catch(e){}return result}();var nativeCreate=isNative(nativeCreate=Object.create)&&nativeCreate,nativeIsArray=isNative(nativeIsArray=Array.isArray)&&nativeIsArray,nativeIsFinite=context.isFinite,nativeIsNaN=context.isNaN,nativeKeys=isNative(nativeKeys=Object.keys)&&nativeKeys,nativeMax=Math.max,nativeMin=Math.min,nativeParseInt=context.parseInt,nativeRandom=Math.random;var ctorByClass={};ctorByClass[arrayClass]=Array;ctorByClass[boolClass]=Boolean;ctorByClass[dateClass]=Date;ctorByClass[funcClass]=Function;ctorByClass[objectClass]=Object;ctorByClass[numberClass]=Number;ctorByClass[regexpClass]=RegExp;ctorByClass[stringClass]=String;function lodash(value){return value&&typeof value=="object"&&!isArray(value)&&hasOwnProperty.call(value,"__wrapped__")?value:new lodashWrapper(value)}function lodashWrapper(value,chainAll){this.__chain__=!!chainAll;this.__wrapped__=value}lodashWrapper.prototype=lodash.prototype;var support=lodash.support={};support.funcDecomp=!isNative(context.WinRTError)&&reThis.test(runInContext);support.funcNames=typeof Function.name=="string";lodash.templateSettings={escape:/<%-([\s\S]+?)%>/g,evaluate:/<%([\s\S]+?)%>/g,interpolate:reInterpolate,variable:"",imports:{_:lodash}};function baseBind(bindData){var func=bindData[0],partialArgs=bindData[2],thisArg=bindData[4];function bound(){if(partialArgs){var args=slice(partialArgs);push.apply(args,arguments)}if(this instanceof bound){var thisBinding=baseCreate(func.prototype),result=func.apply(thisBinding,args||arguments);return isObject(result)?result:thisBinding}return func.apply(thisArg,args||arguments)}setBindData(bound,bindData);return bound}function baseClone(value,isDeep,callback,stackA,stackB){if(callback){var result=callback(value);if(typeof result!="undefined"){return result}}var isObj=isObject(value);if(isObj){var className=toString.call(value);if(!cloneableClasses[className]){return value}var ctor=ctorByClass[className];switch(className){case boolClass:case dateClass:return new ctor(+value);case numberClass:case stringClass:return new ctor(value);case regexpClass:result=ctor(value.source,reFlags.exec(value));result.lastIndex=value.lastIndex;return result}}else{return value}var isArr=isArray(value);if(isDeep){var initedStack=!stackA;stackA||(stackA=getArray());stackB||(stackB=getArray());var length=stackA.length;while(length--){if(stackA[length]==value){return stackB[length]}}result=isArr?ctor(value.length):{}}else{result=isArr?slice(value):assign({},value)}if(isArr){if(hasOwnProperty.call(value,"index")){result.index=value.index}if(hasOwnProperty.call(value,"input")){result.input=value.input}}if(!isDeep){return result}stackA.push(value);stackB.push(result);(isArr?forEach:forOwn)(value,function(objValue,key){result[key]=baseClone(objValue,isDeep,callback,stackA,stackB)});if(initedStack){releaseArray(stackA);releaseArray(stackB)}return result}function baseCreate(prototype,properties){return isObject(prototype)?nativeCreate(prototype):{}}if(!nativeCreate){baseCreate=function(){function Object(){}return function(prototype){if(isObject(prototype)){Object.prototype=prototype;var result=new Object;Object.prototype=null}return result||context.Object()}}()}function baseCreateCallback(func,thisArg,argCount){if(typeof func!="function"){return identity}if(typeof thisArg=="undefined"||!("prototype"in func)){return func}var bindData=func.__bindData__;if(typeof bindData=="undefined"){if(support.funcNames){bindData=!func.name}bindData=bindData||!support.funcDecomp;if(!bindData){var source=fnToString.call(func);if(!support.funcNames){bindData=!reFuncName.test(source)}if(!bindData){bindData=reThis.test(source);setBindData(func,bindData)}}}if(bindData===false||bindData!==true&&bindData[1]&1){return func}switch(argCount){case 1:return function(value){return func.call(thisArg,value)};case 2:return function(a,b){return func.call(thisArg,a,b)};case 3:return function(value,index,collection){return func.call(thisArg,value,index,collection)};case 4:return function(accumulator,value,index,collection){return func.call(thisArg,accumulator,value,index,collection)}}return bind(func,thisArg)}function baseCreateWrapper(bindData){var func=bindData[0],bitmask=bindData[1],partialArgs=bindData[2],partialRightArgs=bindData[3],thisArg=bindData[4],arity=bindData[5];var isBind=bitmask&1,isBindKey=bitmask&2,isCurry=bitmask&4,isCurryBound=bitmask&8,key=func;function bound(){var thisBinding=isBind?thisArg:this;if(partialArgs){var args=slice(partialArgs);push.apply(args,arguments)}if(partialRightArgs||isCurry){args||(args=slice(arguments));if(partialRightArgs){push.apply(args,partialRightArgs)}if(isCurry&&args.length<arity){bitmask|=16&~32;return baseCreateWrapper([func,isCurryBound?bitmask:bitmask&~3,args,null,thisArg,arity])}}args||(args=arguments);if(isBindKey){func=thisBinding[key]}if(this instanceof bound){thisBinding=baseCreate(func.prototype);var result=func.apply(thisBinding,args);return isObject(result)?result:thisBinding}return func.apply(thisBinding,args)}setBindData(bound,bindData);return bound}function baseDifference(array,values){var index=-1,indexOf=getIndexOf(),length=array?array.length:0,isLarge=length>=largeArraySize&&indexOf===baseIndexOf,result=[];if(isLarge){var cache=createCache(values);if(cache){indexOf=cacheIndexOf;values=cache}else{isLarge=false}}while(++index<length){var value=array[index];if(indexOf(values,value)<0){result.push(value)}}if(isLarge){releaseObject(values)}return result}function baseFlatten(array,isShallow,isStrict,fromIndex){var index=(fromIndex||0)-1,length=array?array.length:0,result=[];while(++index<length){var value=array[index];if(value&&typeof value=="object"&&typeof value.length=="number"&&(isArray(value)||isArguments(value))){
-if(!isShallow){value=baseFlatten(value,isShallow,isStrict)}var valIndex=-1,valLength=value.length,resIndex=result.length;result.length+=valLength;while(++valIndex<valLength){result[resIndex++]=value[valIndex]}}else if(!isStrict){result.push(value)}}return result}function baseIsEqual(a,b,callback,isWhere,stackA,stackB){if(callback){var result=callback(a,b);if(typeof result!="undefined"){return!!result}}if(a===b){return a!==0||1/a==1/b}var type=typeof a,otherType=typeof b;if(a===a&&!(a&&objectTypes[type])&&!(b&&objectTypes[otherType])){return false}if(a==null||b==null){return a===b}var className=toString.call(a),otherClass=toString.call(b);if(className==argsClass){className=objectClass}if(otherClass==argsClass){otherClass=objectClass}if(className!=otherClass){return false}switch(className){case boolClass:case dateClass:return+a==+b;case numberClass:return a!=+a?b!=+b:a==0?1/a==1/b:a==+b;case regexpClass:case stringClass:return a==String(b)}var isArr=className==arrayClass;if(!isArr){var aWrapped=hasOwnProperty.call(a,"__wrapped__"),bWrapped=hasOwnProperty.call(b,"__wrapped__");if(aWrapped||bWrapped){return baseIsEqual(aWrapped?a.__wrapped__:a,bWrapped?b.__wrapped__:b,callback,isWhere,stackA,stackB)}if(className!=objectClass){return false}var ctorA=a.constructor,ctorB=b.constructor;if(ctorA!=ctorB&&!(isFunction(ctorA)&&ctorA instanceof ctorA&&isFunction(ctorB)&&ctorB instanceof ctorB)&&("constructor"in a&&"constructor"in b)){return false}}var initedStack=!stackA;stackA||(stackA=getArray());stackB||(stackB=getArray());var length=stackA.length;while(length--){if(stackA[length]==a){return stackB[length]==b}}var size=0;result=true;stackA.push(a);stackB.push(b);if(isArr){length=a.length;size=b.length;result=size==length;if(result||isWhere){while(size--){var index=length,value=b[size];if(isWhere){while(index--){if(result=baseIsEqual(a[index],value,callback,isWhere,stackA,stackB)){break}}}else if(!(result=baseIsEqual(a[size],value,callback,isWhere,stackA,stackB))){break}}}}else{forIn(b,function(value,key,b){if(hasOwnProperty.call(b,key)){size++;return result=hasOwnProperty.call(a,key)&&baseIsEqual(a[key],value,callback,isWhere,stackA,stackB)}});if(result&&!isWhere){forIn(a,function(value,key,a){if(hasOwnProperty.call(a,key)){return result=--size>-1}})}}stackA.pop();stackB.pop();if(initedStack){releaseArray(stackA);releaseArray(stackB)}return result}function baseMerge(object,source,callback,stackA,stackB){(isArray(source)?forEach:forOwn)(source,function(source,key){var found,isArr,result=source,value=object[key];if(source&&((isArr=isArray(source))||isPlainObject(source))){var stackLength=stackA.length;while(stackLength--){if(found=stackA[stackLength]==source){value=stackB[stackLength];break}}if(!found){var isShallow;if(callback){result=callback(value,source);if(isShallow=typeof result!="undefined"){value=result}}if(!isShallow){value=isArr?isArray(value)?value:[]:isPlainObject(value)?value:{}}stackA.push(source);stackB.push(value);if(!isShallow){baseMerge(value,source,callback,stackA,stackB)}}}else{if(callback){result=callback(value,source);if(typeof result=="undefined"){result=source}}if(typeof result!="undefined"){value=result}}object[key]=value})}function baseRandom(min,max){return min+floor(nativeRandom()*(max-min+1))}function baseUniq(array,isSorted,callback){var index=-1,indexOf=getIndexOf(),length=array?array.length:0,result=[];var isLarge=!isSorted&&length>=largeArraySize&&indexOf===baseIndexOf,seen=callback||isLarge?getArray():result;if(isLarge){var cache=createCache(seen);indexOf=cacheIndexOf;seen=cache}while(++index<length){var value=array[index],computed=callback?callback(value,index,array):value;if(isSorted?!index||seen[seen.length-1]!==computed:indexOf(seen,computed)<0){if(callback||isLarge){seen.push(computed)}result.push(value)}}if(isLarge){releaseArray(seen.array);releaseObject(seen)}else if(callback){releaseArray(seen)}return result}function createAggregator(setter){return function(collection,callback,thisArg){var result={};callback=lodash.createCallback(callback,thisArg,3);var index=-1,length=collection?collection.length:0;if(typeof length=="number"){while(++index<length){var value=collection[index];setter(result,value,callback(value,index,collection),collection)}}else{forOwn(collection,function(value,key,collection){setter(result,value,callback(value,key,collection),collection)})}return result}}function createWrapper(func,bitmask,partialArgs,partialRightArgs,thisArg,arity){var isBind=bitmask&1,isBindKey=bitmask&2,isCurry=bitmask&4,isCurryBound=bitmask&8,isPartial=bitmask&16,isPartialRight=bitmask&32;if(!isBindKey&&!isFunction(func)){throw new TypeError}if(isPartial&&!partialArgs.length){bitmask&=~16;isPartial=partialArgs=false}if(isPartialRight&&!partialRightArgs.length){bitmask&=~32;isPartialRight=partialRightArgs=false}var bindData=func&&func.__bindData__;if(bindData&&bindData!==true){bindData=slice(bindData);if(bindData[2]){bindData[2]=slice(bindData[2])}if(bindData[3]){bindData[3]=slice(bindData[3])}if(isBind&&!(bindData[1]&1)){bindData[4]=thisArg}if(!isBind&&bindData[1]&1){bitmask|=8}if(isCurry&&!(bindData[1]&4)){bindData[5]=arity}if(isPartial){push.apply(bindData[2]||(bindData[2]=[]),partialArgs)}if(isPartialRight){unshift.apply(bindData[3]||(bindData[3]=[]),partialRightArgs)}bindData[1]|=bitmask;return createWrapper.apply(null,bindData)}var creater=bitmask==1||bitmask===17?baseBind:baseCreateWrapper;return creater([func,bitmask,partialArgs,partialRightArgs,thisArg,arity])}function escapeHtmlChar(match){return htmlEscapes[match]}function getIndexOf(){var result=(result=lodash.indexOf)===indexOf?baseIndexOf:result;return result}function isNative(value){return typeof value=="function"&&reNative.test(value)}var setBindData=!defineProperty?noop:function(func,value){descriptor.value=value;defineProperty(func,"__bindData__",descriptor)};function shimIsPlainObject(value){var ctor,result;if(!(value&&toString.call(value)==objectClass)||(ctor=value.constructor,isFunction(ctor)&&!(ctor instanceof ctor))){return false}forIn(value,function(value,key){result=key});return typeof result=="undefined"||hasOwnProperty.call(value,result)}function unescapeHtmlChar(match){return htmlUnescapes[match]}function isArguments(value){return value&&typeof value=="object"&&typeof value.length=="number"&&toString.call(value)==argsClass||false}var isArray=nativeIsArray||function(value){return value&&typeof value=="object"&&typeof value.length=="number"&&toString.call(value)==arrayClass||false};var shimKeys=function(object){var index,iterable=object,result=[];if(!iterable)return result;if(!objectTypes[typeof object])return result;for(index in iterable){if(hasOwnProperty.call(iterable,index)){result.push(index)}}return result};var keys=!nativeKeys?shimKeys:function(object){if(!isObject(object)){return[]}return nativeKeys(object)};var htmlEscapes={"&":"&amp;","<":"&lt;",">":"&gt;",'"':"&quot;","'":"&#39;"};var htmlUnescapes=invert(htmlEscapes);var reEscapedHtml=RegExp("("+keys(htmlUnescapes).join("|")+")","g"),reUnescapedHtml=RegExp("["+keys(htmlEscapes).join("")+"]","g");var assign=function(object,source,guard){var index,iterable=object,result=iterable;if(!iterable)return result;var args=arguments,argsIndex=0,argsLength=typeof guard=="number"?2:args.length;if(argsLength>3&&typeof args[argsLength-2]=="function"){var callback=baseCreateCallback(args[--argsLength-1],args[argsLength--],2)}else if(argsLength>2&&typeof args[argsLength-1]=="function"){callback=args[--argsLength]}while(++argsIndex<argsLength){iterable=args[argsIndex];if(iterable&&objectTypes[typeof iterable]){var ownIndex=-1,ownProps=objectTypes[typeof iterable]&&keys(iterable),length=ownProps?ownProps.length:0;while(++ownIndex<length){index=ownProps[ownIndex];result[index]=callback?callback(result[index],iterable[index]):iterable[index]}}}return result};function clone(value,isDeep,callback,thisArg){if(typeof isDeep!="boolean"&&isDeep!=null){thisArg=callback;callback=isDeep;isDeep=false}return baseClone(value,isDeep,typeof callback=="function"&&baseCreateCallback(callback,thisArg,1))}function cloneDeep(value,callback,thisArg){return baseClone(value,true,typeof callback=="function"&&baseCreateCallback(callback,thisArg,1))}function create(prototype,properties){var result=baseCreate(prototype);return properties?assign(result,properties):result}var defaults=function(object,source,guard){var index,iterable=object,result=iterable;if(!iterable)return result;var args=arguments,argsIndex=0,argsLength=typeof guard=="number"?2:args.length;while(++argsIndex<argsLength){iterable=args[argsIndex];if(iterable&&objectTypes[typeof iterable]){var ownIndex=-1,ownProps=objectTypes[typeof iterable]&&keys(iterable),length=ownProps?ownProps.length:0;while(++ownIndex<length){index=ownProps[ownIndex];if(typeof result[index]=="undefined")result[index]=iterable[index]}}}return result};function findKey(object,callback,thisArg){var result;callback=lodash.createCallback(callback,thisArg,3);forOwn(object,function(value,key,object){if(callback(value,key,object)){result=key;return false}});return result}function findLastKey(object,callback,thisArg){var result;callback=lodash.createCallback(callback,thisArg,3);forOwnRight(object,function(value,key,object){if(callback(value,key,object)){result=key;return false}});return result}var forIn=function(collection,callback,thisArg){var index,iterable=collection,result=iterable;if(!iterable)return result;if(!objectTypes[typeof iterable])return result;callback=callback&&typeof thisArg=="undefined"?callback:baseCreateCallback(callback,thisArg,3);for(index in iterable){if(callback(iterable[index],index,collection)===false)return result}return result};function forInRight(object,callback,thisArg){var pairs=[];forIn(object,function(value,key){pairs.push(key,value)});var length=pairs.length;callback=baseCreateCallback(callback,thisArg,3);while(length--){if(callback(pairs[length--],pairs[length],object)===false){break}}return object}var forOwn=function(collection,callback,thisArg){var index,iterable=collection,result=iterable;if(!iterable)return result;if(!objectTypes[typeof iterable])return result;callback=callback&&typeof thisArg=="undefined"?callback:baseCreateCallback(callback,thisArg,3);var ownIndex=-1,ownProps=objectTypes[typeof iterable]&&keys(iterable),length=ownProps?ownProps.length:0;while(++ownIndex<length){index=ownProps[ownIndex];if(callback(iterable[index],index,collection)===false)return result}return result};function forOwnRight(object,callback,thisArg){var props=keys(object),length=props.length;callback=baseCreateCallback(callback,thisArg,3);while(length--){var key=props[length];if(callback(object[key],key,object)===false){break}}return object}function functions(object){var result=[];forIn(object,function(value,key){if(isFunction(value)){result.push(key)}});return result.sort()}function has(object,key){return object?hasOwnProperty.call(object,key):false}function invert(object){var index=-1,props=keys(object),length=props.length,result={};while(++index<length){var key=props[index];result[object[key]]=key}return result}function isBoolean(value){return value===true||value===false||value&&typeof value=="object"&&toString.call(value)==boolClass||false}function isDate(value){return value&&typeof value=="object"&&toString.call(value)==dateClass||false}function isElement(value){return value&&value.nodeType===1||false}function isEmpty(value){var result=true;if(!value){return result}var className=toString.call(value),length=value.length;if(className==arrayClass||className==stringClass||className==argsClass||className==objectClass&&typeof length=="number"&&isFunction(value.splice)){return!length}forOwn(value,function(){return result=false});return result}function isEqual(a,b,callback,thisArg){return baseIsEqual(a,b,typeof callback=="function"&&baseCreateCallback(callback,thisArg,2))}function isFinite(value){return nativeIsFinite(value)&&!nativeIsNaN(parseFloat(value))}function isFunction(value){return typeof value=="function"}function isObject(value){return!!(value&&objectTypes[typeof value])}function isNaN(value){return isNumber(value)&&value!=+value}function isNull(value){return value===null}function isNumber(value){return typeof value=="number"||value&&typeof value=="object"&&toString.call(value)==numberClass||false}var isPlainObject=!getPrototypeOf?shimIsPlainObject:function(value){if(!(value&&toString.call(value)==objectClass)){return false}var valueOf=value.valueOf,objProto=isNative(valueOf)&&(objProto=getPrototypeOf(valueOf))&&getPrototypeOf(objProto);return objProto?value==objProto||getPrototypeOf(value)==objProto:shimIsPlainObject(value)};function isRegExp(value){return value&&typeof value=="object"&&toString.call(value)==regexpClass||false}function isString(value){return typeof value=="string"||value&&typeof value=="object"&&toString.call(value)==stringClass||false}function isUndefined(value){return typeof value=="undefined"}function mapValues(object,callback,thisArg){var result={};callback=lodash.createCallback(callback,thisArg,3);forOwn(object,function(value,key,object){result[key]=callback(value,key,object)});return result}function merge(object){var args=arguments,length=2;if(!isObject(object)){return object}if(typeof args[2]!="number"){length=args.length}if(length>3&&typeof args[length-2]=="function"){var callback=baseCreateCallback(args[--length-1],args[length--],2)}else if(length>2&&typeof args[length-1]=="function"){callback=args[--length]}var sources=slice(arguments,1,length),index=-1,stackA=getArray(),stackB=getArray();while(++index<length){baseMerge(object,sources[index],callback,stackA,stackB)}releaseArray(stackA);releaseArray(stackB);return object}function omit(object,callback,thisArg){var result={};if(typeof callback!="function"){var props=[];forIn(object,function(value,key){props.push(key)});props=baseDifference(props,baseFlatten(arguments,true,false,1));var index=-1,length=props.length;while(++index<length){var key=props[index];result[key]=object[key]}}else{callback=lodash.createCallback(callback,thisArg,3);forIn(object,function(value,key,object){if(!callback(value,key,object)){result[key]=value}})}return result}function pairs(object){var index=-1,props=keys(object),length=props.length,result=Array(length);while(++index<length){var key=props[index];result[index]=[key,object[key]]}return result}function pick(object,callback,thisArg){var result={};if(typeof callback!="function"){var index=-1,props=baseFlatten(arguments,true,false,1),length=isObject(object)?props.length:0;while(++index<length){var key=props[index];if(key in object){result[key]=object[key]}}}else{callback=lodash.createCallback(callback,thisArg,3);forIn(object,function(value,key,object){if(callback(value,key,object)){result[key]=value}})}return result}function transform(object,callback,accumulator,thisArg){var isArr=isArray(object);if(accumulator==null){if(isArr){accumulator=[]}else{var ctor=object&&object.constructor,proto=ctor&&ctor.prototype;accumulator=baseCreate(proto)}}if(callback){callback=lodash.createCallback(callback,thisArg,4);(isArr?forEach:forOwn)(object,function(value,index,object){return callback(accumulator,value,index,object)})}return accumulator}function values(object){var index=-1,props=keys(object),length=props.length,result=Array(length);while(++index<length){result[index]=object[props[index]]}return result}function at(collection){var args=arguments,index=-1,props=baseFlatten(args,true,false,1),length=args[2]&&args[2][args[1]]===collection?1:props.length,result=Array(length);while(++index<length){result[index]=collection[props[index]]}return result}function contains(collection,target,fromIndex){var index=-1,indexOf=getIndexOf(),length=collection?collection.length:0,result=false;fromIndex=(fromIndex<0?nativeMax(0,length+fromIndex):fromIndex)||0;if(isArray(collection)){result=indexOf(collection,target,fromIndex)>-1}else if(typeof length=="number"){result=(isString(collection)?collection.indexOf(target,fromIndex):indexOf(collection,target,fromIndex))>-1}else{forOwn(collection,function(value){if(++index>=fromIndex){return!(result=value===target)}})}return result}var countBy=createAggregator(function(result,value,key){hasOwnProperty.call(result,key)?result[key]++:result[key]=1});function every(collection,callback,thisArg){var result=true;callback=lodash.createCallback(callback,thisArg,3);var index=-1,length=collection?collection.length:0;if(typeof length=="number"){while(++index<length){if(!(result=!!callback(collection[index],index,collection))){break}}}else{forOwn(collection,function(value,index,collection){return result=!!callback(value,index,collection)})}return result}function filter(collection,callback,thisArg){var result=[];callback=lodash.createCallback(callback,thisArg,3);var index=-1,length=collection?collection.length:0;if(typeof length=="number"){while(++index<length){var value=collection[index];if(callback(value,index,collection)){result.push(value)}}}else{forOwn(collection,function(value,index,collection){if(callback(value,index,collection)){result.push(value)}})}return result}function find(collection,callback,thisArg){callback=lodash.createCallback(callback,thisArg,3);var index=-1,length=collection?collection.length:0;if(typeof length=="number"){while(++index<length){var value=collection[index];if(callback(value,index,collection)){return value}}}else{var result;forOwn(collection,function(value,index,collection){if(callback(value,index,collection)){result=value;return false}});return result}}function findLast(collection,callback,thisArg){var result;callback=lodash.createCallback(callback,thisArg,3);forEachRight(collection,function(value,index,collection){if(callback(value,index,collection)){result=value;return false}});return result}function forEach(collection,callback,thisArg){var index=-1,length=collection?collection.length:0;callback=callback&&typeof thisArg=="undefined"?callback:baseCreateCallback(callback,thisArg,3);if(typeof length=="number"){while(++index<length){if(callback(collection[index],index,collection)===false){break}}}else{forOwn(collection,callback)}return collection}function forEachRight(collection,callback,thisArg){var length=collection?collection.length:0;callback=callback&&typeof thisArg=="undefined"?callback:baseCreateCallback(callback,thisArg,3);if(typeof length=="number"){while(length--){if(callback(collection[length],length,collection)===false){break}}}else{var props=keys(collection);length=props.length;forOwn(collection,function(value,key,collection){key=props?props[--length]:--length;return callback(collection[key],key,collection)})}return collection}var groupBy=createAggregator(function(result,value,key){(hasOwnProperty.call(result,key)?result[key]:result[key]=[]).push(value)});var indexBy=createAggregator(function(result,value,key){result[key]=value});function invoke(collection,methodName){var args=slice(arguments,2),index=-1,isFunc=typeof methodName=="function",length=collection?collection.length:0,result=Array(typeof length=="number"?length:0);forEach(collection,function(value){result[++index]=(isFunc?methodName:value[methodName]).apply(value,args)});return result}function map(collection,callback,thisArg){var index=-1,length=collection?collection.length:0;callback=lodash.createCallback(callback,thisArg,3);if(typeof length=="number"){var result=Array(length);while(++index<length){result[index]=callback(collection[index],index,collection)}}else{result=[];forOwn(collection,function(value,key,collection){result[++index]=callback(value,key,collection)})}return result}function max(collection,callback,thisArg){var computed=-Infinity,result=computed;if(typeof callback!="function"&&thisArg&&thisArg[callback]===collection){callback=null}if(callback==null&&isArray(collection)){var index=-1,length=collection.length;while(++index<length){var value=collection[index];if(value>result){result=value}}}else{callback=callback==null&&isString(collection)?charAtCallback:lodash.createCallback(callback,thisArg,3);forEach(collection,function(value,index,collection){var current=callback(value,index,collection);if(current>computed){computed=current;result=value}})}return result}function min(collection,callback,thisArg){var computed=Infinity,result=computed;if(typeof callback!="function"&&thisArg&&thisArg[callback]===collection){callback=null}if(callback==null&&isArray(collection)){var index=-1,length=collection.length;while(++index<length){var value=collection[index];if(value<result){result=value}}}else{callback=callback==null&&isString(collection)?charAtCallback:lodash.createCallback(callback,thisArg,3);forEach(collection,function(value,index,collection){var current=callback(value,index,collection);if(current<computed){computed=current;result=value}})}return result}var pluck=map;function reduce(collection,callback,accumulator,thisArg){if(!collection)return accumulator;var noaccum=arguments.length<3;callback=lodash.createCallback(callback,thisArg,4);var index=-1,length=collection.length;if(typeof length=="number"){if(noaccum){accumulator=collection[++index]}while(++index<length){accumulator=callback(accumulator,collection[index],index,collection)}}else{forOwn(collection,function(value,index,collection){accumulator=noaccum?(noaccum=false,value):callback(accumulator,value,index,collection)})}return accumulator}function reduceRight(collection,callback,accumulator,thisArg){var noaccum=arguments.length<3;callback=lodash.createCallback(callback,thisArg,4);forEachRight(collection,function(value,index,collection){accumulator=noaccum?(noaccum=false,value):callback(accumulator,value,index,collection)});return accumulator}function reject(collection,callback,thisArg){callback=lodash.createCallback(callback,thisArg,3);return filter(collection,function(value,index,collection){return!callback(value,index,collection)})}function sample(collection,n,guard){if(collection&&typeof collection.length!="number"){collection=values(collection)}if(n==null||guard){return collection?collection[baseRandom(0,collection.length-1)]:undefined}var result=shuffle(collection);result.length=nativeMin(nativeMax(0,n),result.length);return result}function shuffle(collection){var index=-1,length=collection?collection.length:0,result=Array(typeof length=="number"?length:0);forEach(collection,function(value){var rand=baseRandom(0,++index);result[index]=result[rand];result[rand]=value});return result}function size(collection){var length=collection?collection.length:0;return typeof length=="number"?length:keys(collection).length}function some(collection,callback,thisArg){var result;callback=lodash.createCallback(callback,thisArg,3);var index=-1,length=collection?collection.length:0;if(typeof length=="number"){while(++index<length){if(result=callback(collection[index],index,collection)){break}}}else{forOwn(collection,function(value,index,collection){return!(result=callback(value,index,collection))})}return!!result}function sortBy(collection,callback,thisArg){var index=-1,isArr=isArray(callback),length=collection?collection.length:0,result=Array(typeof length=="number"?length:0);if(!isArr){callback=lodash.createCallback(callback,thisArg,3)}forEach(collection,function(value,key,collection){var object=result[++index]=getObject();if(isArr){object.criteria=map(callback,function(key){return value[key]})}else{(object.criteria=getArray())[0]=callback(value,key,collection)}object.index=index;object.value=value});length=result.length;result.sort(compareAscending);while(length--){var object=result[length];result[length]=object.value;if(!isArr){releaseArray(object.criteria)}releaseObject(object)}return result}function toArray(collection){if(collection&&typeof collection.length=="number"){return slice(collection)}return values(collection)}var where=filter;function compact(array){var index=-1,length=array?array.length:0,result=[];while(++index<length){var value=array[index];if(value){result.push(value)}}return result}function difference(array){return baseDifference(array,baseFlatten(arguments,true,true,1))}function findIndex(array,callback,thisArg){var index=-1,length=array?array.length:0;callback=lodash.createCallback(callback,thisArg,3);while(++index<length){if(callback(array[index],index,array)){return index}}return-1}function findLastIndex(array,callback,thisArg){var length=array?array.length:0;callback=lodash.createCallback(callback,thisArg,3);while(length--){if(callback(array[length],length,array)){return length}}return-1}function first(array,callback,thisArg){var n=0,length=array?array.length:0;if(typeof callback!="number"&&callback!=null){var index=-1;callback=lodash.createCallback(callback,thisArg,3);while(++index<length&&callback(array[index],index,array)){n++}}else{n=callback;if(n==null||thisArg){return array?array[0]:undefined}}return slice(array,0,nativeMin(nativeMax(0,n),length))}function flatten(array,isShallow,callback,thisArg){if(typeof isShallow!="boolean"&&isShallow!=null){thisArg=callback;callback=typeof isShallow!="function"&&thisArg&&thisArg[isShallow]===array?null:isShallow;isShallow=false}if(callback!=null){array=map(array,callback,thisArg)}return baseFlatten(array,isShallow)}function indexOf(array,value,fromIndex){if(typeof fromIndex=="number"){var length=array?array.length:0;fromIndex=fromIndex<0?nativeMax(0,length+fromIndex):fromIndex||0}else if(fromIndex){var index=sortedIndex(array,value);return array[index]===value?index:-1}return baseIndexOf(array,value,fromIndex)}function initial(array,callback,thisArg){var n=0,length=array?array.length:0;if(typeof callback!="number"&&callback!=null){var index=length;callback=lodash.createCallback(callback,thisArg,3);while(index--&&callback(array[index],index,array)){n++}}else{n=callback==null||thisArg?1:callback||n}return slice(array,0,nativeMin(nativeMax(0,length-n),length))}function intersection(){var args=[],argsIndex=-1,argsLength=arguments.length,caches=getArray(),indexOf=getIndexOf(),trustIndexOf=indexOf===baseIndexOf,seen=getArray();while(++argsIndex<argsLength){var value=arguments[argsIndex];if(isArray(value)||isArguments(value)){args.push(value);caches.push(trustIndexOf&&value.length>=largeArraySize&&createCache(argsIndex?args[argsIndex]:seen))}}var array=args[0],index=-1,length=array?array.length:0,result=[];outer:while(++index<length){var cache=caches[0];value=array[index];if((cache?cacheIndexOf(cache,value):indexOf(seen,value))<0){argsIndex=argsLength;(cache||seen).push(value);while(--argsIndex){cache=caches[argsIndex];if((cache?cacheIndexOf(cache,value):indexOf(args[argsIndex],value))<0){continue outer}}result.push(value)}}while(argsLength--){cache=caches[argsLength];if(cache){releaseObject(cache)}}releaseArray(caches);releaseArray(seen);return result}function last(array,callback,thisArg){var n=0,length=array?array.length:0;if(typeof callback!="number"&&callback!=null){var index=length;callback=lodash.createCallback(callback,thisArg,3);while(index--&&callback(array[index],index,array)){n++}}else{n=callback;if(n==null||thisArg){return array?array[length-1]:undefined}}return slice(array,nativeMax(0,length-n))}function lastIndexOf(array,value,fromIndex){var index=array?array.length:0;if(typeof fromIndex=="number"){index=(fromIndex<0?nativeMax(0,index+fromIndex):nativeMin(fromIndex,index-1))+1}while(index--){if(array[index]===value){return index}}return-1}function pull(array){var args=arguments,argsIndex=0,argsLength=args.length,length=array?array.length:0;while(++argsIndex<argsLength){var index=-1,value=args[argsIndex];while(++index<length){if(array[index]===value){splice.call(array,index--,1);length--}}}return array}function range(start,end,step){start=+start||0;step=typeof step=="number"?step:+step||1;if(end==null){end=start;start=0}var index=-1,length=nativeMax(0,ceil((end-start)/(step||1))),result=Array(length);while(++index<length){result[index]=start;start+=step}return result}function remove(array,callback,thisArg){var index=-1,length=array?array.length:0,result=[];callback=lodash.createCallback(callback,thisArg,3);while(++index<length){var value=array[index];if(callback(value,index,array)){result.push(value);splice.call(array,index--,1);length--}}return result}function rest(array,callback,thisArg){if(typeof callback!="number"&&callback!=null){var n=0,index=-1,length=array?array.length:0;callback=lodash.createCallback(callback,thisArg,3);while(++index<length&&callback(array[index],index,array)){n++}}else{n=callback==null||thisArg?1:nativeMax(0,callback)}return slice(array,n)}function sortedIndex(array,value,callback,thisArg){var low=0,high=array?array.length:low;callback=callback?lodash.createCallback(callback,thisArg,1):identity;value=callback(value);while(low<high){var mid=low+high>>>1;callback(array[mid])<value?low=mid+1:high=mid}return low}function union(){return baseUniq(baseFlatten(arguments,true,true))}function uniq(array,isSorted,callback,thisArg){if(typeof isSorted!="boolean"&&isSorted!=null){thisArg=callback;callback=typeof isSorted!="function"&&thisArg&&thisArg[isSorted]===array?null:isSorted;isSorted=false}if(callback!=null){callback=lodash.createCallback(callback,thisArg,3)}return baseUniq(array,isSorted,callback)}function without(array){return baseDifference(array,slice(arguments,1))}function xor(){var index=-1,length=arguments.length;while(++index<length){var array=arguments[index];if(isArray(array)||isArguments(array)){var result=result?baseUniq(baseDifference(result,array).concat(baseDifference(array,result))):array}}return result||[]}function zip(){var array=arguments.length>1?arguments:arguments[0],index=-1,length=array?max(pluck(array,"length")):0,result=Array(length<0?0:length);while(++index<length){result[index]=pluck(array,index)}return result}function zipObject(keys,values){var index=-1,length=keys?keys.length:0,result={};if(!values&&length&&!isArray(keys[0])){values=[]}while(++index<length){var key=keys[index];if(values){result[key]=values[index]}else if(key){result[key[0]]=key[1]}}return result}function after(n,func){if(!isFunction(func)){throw new TypeError}return function(){if(--n<1){return func.apply(this,arguments)}}}function bind(func,thisArg){return arguments.length>2?createWrapper(func,17,slice(arguments,2),null,thisArg):createWrapper(func,1,null,null,thisArg)}function bindAll(object){var funcs=arguments.length>1?baseFlatten(arguments,true,false,1):functions(object),index=-1,length=funcs.length;while(++index<length){var key=funcs[index];object[key]=createWrapper(object[key],1,null,null,object)}return object}function bindKey(object,key){return arguments.length>2?createWrapper(key,19,slice(arguments,2),null,object):createWrapper(key,3,null,null,object)}function compose(){var funcs=arguments,length=funcs.length;while(length--){if(!isFunction(funcs[length])){throw new TypeError}}return function(){var args=arguments,length=funcs.length;while(length--){args=[funcs[length].apply(this,args)]}return args[0]}}function curry(func,arity){arity=typeof arity=="number"?arity:+arity||func.length;return createWrapper(func,4,null,null,null,arity)}function debounce(func,wait,options){var args,maxTimeoutId,result,stamp,thisArg,timeoutId,trailingCall,lastCalled=0,maxWait=false,trailing=true;if(!isFunction(func)){throw new TypeError}wait=nativeMax(0,wait)||0;if(options===true){var leading=true;trailing=false}else if(isObject(options)){leading=options.leading;maxWait="maxWait"in options&&(nativeMax(wait,options.maxWait)||0);trailing="trailing"in options?options.trailing:trailing}var delayed=function(){var remaining=wait-(now()-stamp);if(remaining<=0){if(maxTimeoutId){clearTimeout(maxTimeoutId)}var isCalled=trailingCall;maxTimeoutId=timeoutId=trailingCall=undefined;if(isCalled){lastCalled=now();result=func.apply(thisArg,args);if(!timeoutId&&!maxTimeoutId){args=thisArg=null}}}else{timeoutId=setTimeout(delayed,remaining)}};var maxDelayed=function(){if(timeoutId){clearTimeout(timeoutId)}maxTimeoutId=timeoutId=trailingCall=undefined;if(trailing||maxWait!==wait){lastCalled=now();result=func.apply(thisArg,args);if(!timeoutId&&!maxTimeoutId){args=thisArg=null}}};return function(){args=arguments;
+_.each(inputGraph.nodes(),function(v){var node=canonicalize(inputGraph.node(v));g.setNode(v,_.defaults(selectNumberAttrs(node,nodeNumAttrs),nodeDefaults));g.setParent(v,inputGraph.parent(v))});_.each(inputGraph.edges(),function(e){var edge=canonicalize(inputGraph.edge(e));g.setEdge(e,_.merge({},edgeDefaults,selectNumberAttrs(edge,edgeNumAttrs),_.pick(edge,edgeAttrs)))});return g}function makeSpaceForEdgeLabels(g){var graph=g.graph();graph.ranksep/=2;_.each(g.edges(),function(e){var edge=g.edge(e);edge.minlen*=2;if(edge.labelpos.toLowerCase()!=="c"){if(graph.rankdir==="TB"||graph.rankdir==="BT"){edge.width+=edge.labeloffset}else{edge.height+=edge.labeloffset}}})}function injectEdgeLabelProxies(g){_.each(g.edges(),function(e){var edge=g.edge(e);if(edge.width&&edge.height){var v=g.node(e.v),w=g.node(e.w),label={rank:(w.rank-v.rank)/2+v.rank,e:e};util.addDummyNode(g,"edge-proxy",label,"_ep")}})}function assignRankMinMax(g){var maxRank=0;_.each(g.nodes(),function(v){var node=g.node(v);if(node.borderTop){node.minRank=g.node(node.borderTop).rank;node.maxRank=g.node(node.borderBottom).rank;maxRank=_.max(maxRank,node.maxRank)}});g.graph().maxRank=maxRank}function removeEdgeLabelProxies(g){_.each(g.nodes(),function(v){var node=g.node(v);if(node.dummy==="edge-proxy"){g.edge(node.e).labelRank=node.rank;g.removeNode(v)}})}function translateGraph(g){var minX=Number.POSITIVE_INFINITY,maxX=0,minY=Number.POSITIVE_INFINITY,maxY=0,graphLabel=g.graph(),marginX=graphLabel.marginx||0,marginY=graphLabel.marginy||0;function getExtremes(attrs){var x=attrs.x,y=attrs.y,w=attrs.width,h=attrs.height;minX=Math.min(minX,x-w/2);maxX=Math.max(maxX,x+w/2);minY=Math.min(minY,y-h/2);maxY=Math.max(maxY,y+h/2)}_.each(g.nodes(),function(v){getExtremes(g.node(v))});_.each(g.edges(),function(e){var edge=g.edge(e);if(_.has(edge,"x")){getExtremes(edge)}});minX-=marginX;minY-=marginY;_.each(g.nodes(),function(v){var node=g.node(v);node.x-=minX;node.y-=minY});_.each(g.edges(),function(e){var edge=g.edge(e);_.each(edge.points,function(p){p.x-=minX;p.y-=minY});if(_.has(edge,"x")){edge.x-=minX}if(_.has(edge,"y")){edge.y-=minY}});graphLabel.width=maxX-minX+marginX;graphLabel.height=maxY-minY+marginY}function assignNodeIntersects(g){_.each(g.edges(),function(e){var edge=g.edge(e),nodeV=g.node(e.v),nodeW=g.node(e.w),p1,p2;if(!edge.points){edge.points=[];p1=nodeW;p2=nodeV}else{p1=edge.points[0];p2=edge.points[edge.points.length-1]}edge.points.unshift(util.intersectRect(nodeV,p1));edge.points.push(util.intersectRect(nodeW,p2))})}function fixupEdgeLabelCoords(g){_.each(g.edges(),function(e){var edge=g.edge(e);if(_.has(edge,"x")){if(edge.labelpos==="l"||edge.labelpos==="r"){edge.width-=edge.labeloffset}switch(edge.labelpos){case"l":edge.x-=edge.width/2+edge.labeloffset;break;case"r":edge.x+=edge.width/2+edge.labeloffset;break}}})}function reversePointsForReversedEdges(g){_.each(g.edges(),function(e){var edge=g.edge(e);if(edge.reversed){edge.points.reverse()}})}function removeBorderNodes(g){_.each(g.nodes(),function(v){if(g.children(v).length){var node=g.node(v),t=g.node(node.borderTop),b=g.node(node.borderBottom),l=g.node(_.last(node.borderLeft)),r=g.node(_.last(node.borderRight));node.width=Math.abs(r.x-l.x);node.height=Math.abs(b.y-t.y);node.x=l.x+node.width/2;node.y=t.y+node.height/2}});_.each(g.nodes(),function(v){if(g.node(v).dummy==="border"){g.removeNode(v)}})}function removeSelfEdges(g){_.each(g.edges(),function(e){if(e.v===e.w){var node=g.node(e.v);if(!node.selfEdges){node.selfEdges=[]}node.selfEdges.push({e:e,label:g.edge(e)});g.removeEdge(e)}})}function insertSelfEdges(g){var layers=util.buildLayerMatrix(g);_.each(layers,function(layer){var orderShift=0;_.each(layer,function(v,i){var node=g.node(v);node.order=i+orderShift;_.each(node.selfEdges,function(selfEdge){util.addDummyNode(g,"selfedge",{width:selfEdge.label.width,height:selfEdge.label.height,rank:node.rank,order:i+ ++orderShift,e:selfEdge.e,label:selfEdge.label},"_se")});delete node.selfEdges})})}function positionSelfEdges(g){_.each(g.nodes(),function(v){var node=g.node(v);if(node.dummy==="selfedge"){var selfNode=g.node(node.e.v),x=selfNode.x+selfNode.width/2,y=selfNode.y,dx=node.x-x,dy=selfNode.height/2;g.setEdge(node.e,node.label);g.removeNode(v);node.label.points=[{x:x+2*dx/3,y:y-dy},{x:x+5*dx/6,y:y-dy},{x:x+dx,y:y},{x:x+5*dx/6,y:y+dy},{x:x+2*dx/3,y:y+dy}];node.label.x=node.x;node.label.y=node.y}})}function selectNumberAttrs(obj,attrs){return _.mapValues(_.pick(obj,attrs),Number)}function canonicalize(attrs){var newAttrs={};_.each(attrs,function(v,k){newAttrs[k.toLowerCase()]=v});return newAttrs}},{"./acyclic":28,"./add-border-segments":29,"./coordinate-system":30,"./graphlib":33,"./lodash":36,"./nesting-graph":37,"./normalize":38,"./order":43,"./parent-dummy-chains":48,"./position":50,"./rank":52,"./util":55}],36:[function(require,module,exports){module.exports=require(20)},{"/Users/andrew/Documents/dev/dagre-d3/lib/lodash.js":20,lodash:77}],37:[function(require,module,exports){var _=require("./lodash"),util=require("./util");module.exports={run:run,cleanup:cleanup};function run(g){var root=util.addDummyNode(g,"root",{},"_root"),depths=treeDepths(g),height=_.max(depths)-1,nodeSep=2*height+1;g.graph().nestingRoot=root;_.each(g.edges(),function(e){g.edge(e).minlen*=nodeSep});var weight=sumWeights(g)+1;_.each(g.children(),function(child){dfs(g,root,nodeSep,weight,height,depths,child)});g.graph().nodeRankFactor=nodeSep}function dfs(g,root,nodeSep,weight,height,depths,v){var children=g.children(v);if(!children.length){if(v!==root){g.setEdge(root,v,{weight:0,minlen:nodeSep})}return}var top=util.addBorderNode(g,"_bt"),bottom=util.addBorderNode(g,"_bb"),label=g.node(v);g.setParent(top,v);label.borderTop=top;g.setParent(bottom,v);label.borderBottom=bottom;_.each(children,function(child){dfs(g,root,nodeSep,weight,height,depths,child);var childNode=g.node(child),childTop=childNode.borderTop?childNode.borderTop:child,childBottom=childNode.borderBottom?childNode.borderBottom:child,thisWeight=childNode.borderTop?weight:2*weight,minlen=childTop!==childBottom?1:height-depths[v]+1;g.setEdge(top,childTop,{weight:thisWeight,minlen:minlen,nestingEdge:true});g.setEdge(childBottom,bottom,{weight:thisWeight,minlen:minlen,nestingEdge:true})});if(!g.parent(v)){g.setEdge(root,top,{weight:0,minlen:height+depths[v]})}}function treeDepths(g){var depths={};function dfs(v,depth){var children=g.children(v);if(children&&children.length){_.each(children,function(child){dfs(child,depth+1)})}depths[v]=depth}_.each(g.children(),function(v){dfs(v,1)});return depths}function sumWeights(g){return _.reduce(g.edges(),function(acc,e){return acc+g.edge(e).weight},0)}function cleanup(g){var graphLabel=g.graph();g.removeNode(graphLabel.nestingRoot);delete graphLabel.nestingRoot;_.each(g.edges(),function(e){var edge=g.edge(e);if(edge.nestingEdge){g.removeEdge(e)}})}},{"./lodash":36,"./util":55}],38:[function(require,module,exports){"use strict";var _=require("./lodash"),util=require("./util");module.exports={run:run,undo:undo};function run(g){g.graph().dummyChains=[];_.each(g.edges(),function(edge){normalizeEdge(g,edge)})}function normalizeEdge(g,e){var v=e.v,vRank=g.node(v).rank,w=e.w,wRank=g.node(w).rank,name=e.name,edgeLabel=g.edge(e),labelRank=edgeLabel.labelRank;if(wRank===vRank+1)return;g.removeEdge(e);var dummy,attrs,i;for(i=0,++vRank;vRank<wRank;++i,++vRank){edgeLabel.points=[];attrs={width:0,height:0,edgeLabel:edgeLabel,edgeObj:e,rank:vRank};dummy=util.addDummyNode(g,"edge",attrs,"_d");if(vRank===labelRank){attrs.width=edgeLabel.width;attrs.height=edgeLabel.height;attrs.dummy="edge-label";attrs.labelpos=edgeLabel.labelpos}g.setEdge(v,dummy,{weight:edgeLabel.weight},name);if(i===0){g.graph().dummyChains.push(dummy)}v=dummy}g.setEdge(v,w,{weight:edgeLabel.weight},name)}function undo(g){_.each(g.graph().dummyChains,function(v){var node=g.node(v),origLabel=node.edgeLabel,w;g.setEdge(node.edgeObj,origLabel);while(node.dummy){w=g.successors(v)[0];g.removeNode(v);origLabel.points.push({x:node.x,y:node.y});if(node.dummy==="edge-label"){origLabel.x=node.x;origLabel.y=node.y;origLabel.width=node.width;origLabel.height=node.height}v=w;node=g.node(v)}})}},{"./lodash":36,"./util":55}],39:[function(require,module,exports){var _=require("../lodash");module.exports=addSubgraphConstraints;function addSubgraphConstraints(g,cg,vs){var prev={},rootPrev;_.each(vs,function(v){var child=g.parent(v),parent,prevChild;while(child){parent=g.parent(child);if(parent){prevChild=prev[parent];prev[parent]=child}else{prevChild=rootPrev;rootPrev=child}if(prevChild&&prevChild!==child){cg.setEdge(prevChild,child);return}child=parent}})}},{"../lodash":36}],40:[function(require,module,exports){var _=require("../lodash");module.exports=barycenter;function barycenter(g,movable){return _.map(movable,function(v){var inV=g.inEdges(v);if(!inV.length){return{v:v}}else{var result=_.reduce(inV,function(acc,e){var edge=g.edge(e),nodeU=g.node(e.v);return{sum:acc.sum+edge.weight*nodeU.order,weight:acc.weight+edge.weight}},{sum:0,weight:0});return{v:v,barycenter:result.sum/result.weight,weight:result.weight}}})}},{"../lodash":36}],41:[function(require,module,exports){var _=require("../lodash"),Graph=require("../graphlib").Graph;module.exports=buildLayerGraph;function buildLayerGraph(g,rank,relationship){var root=createRootNode(g),result=new Graph({compound:true}).setGraph({root:root}).setDefaultNodeLabel(function(v){return g.node(v)});_.each(g.nodes(),function(v){var node=g.node(v),parent=g.parent(v);if(node.rank===rank||node.minRank<=rank&&rank<=node.maxRank){result.setNode(v);result.setParent(v,parent||root);_.each(g[relationship](v),function(e){var u=e.v===v?e.w:e.v,edge=result.edge(u,v),weight=!_.isUndefined(edge)?edge.weight:0;result.setEdge(u,v,{weight:g.edge(e).weight+weight})});if(_.has(node,"minRank")){result.setNode(v,{borderLeft:node.borderLeft[rank],borderRight:node.borderRight[rank]})}}});return result}function createRootNode(g){var v;while(g.hasNode(v=_.uniqueId("_root")));return v}},{"../graphlib":33,"../lodash":36}],42:[function(require,module,exports){"use strict";var _=require("../lodash");module.exports=crossCount;function crossCount(g,layering){var cc=0;for(var i=1;i<layering.length;++i){cc+=twoLayerCrossCount(g,layering[i-1],layering[i])}return cc}function twoLayerCrossCount(g,northLayer,southLayer){var southPos=_.zipObject(southLayer,_.map(southLayer,function(v,i){return i}));var southEntries=_.flatten(_.map(northLayer,function(v){return _.chain(g.outEdges(v)).map(function(e){return{pos:southPos[e.w],weight:g.edge(e).weight}}).sortBy("pos").value()}),true);var firstIndex=1;while(firstIndex<southLayer.length)firstIndex<<=1;var treeSize=2*firstIndex-1;firstIndex-=1;var tree=_.map(new Array(treeSize),function(){return 0});var cc=0;_.each(southEntries.forEach(function(entry){var index=entry.pos+firstIndex;tree[index]+=entry.weight;var weightSum=0;while(index>0){if(index%2){weightSum+=tree[index+1]}index=index-1>>1;tree[index]+=entry.weight}cc+=entry.weight*weightSum}));return cc}},{"../lodash":36}],43:[function(require,module,exports){"use strict";var _=require("../lodash"),initOrder=require("./init-order"),crossCount=require("./cross-count"),sortSubgraph=require("./sort-subgraph"),buildLayerGraph=require("./build-layer-graph"),addSubgraphConstraints=require("./add-subgraph-constraints"),Graph=require("../graphlib").Graph,util=require("../util");module.exports=order;function order(g){var maxRank=util.maxRank(g),downLayerGraphs=buildLayerGraphs(g,_.range(1,maxRank+1),"inEdges"),upLayerGraphs=buildLayerGraphs(g,_.range(maxRank-1,-1,-1),"outEdges");var layering=initOrder(g);assignOrder(g,layering);var bestCC=Number.POSITIVE_INFINITY,best;for(var i=0,lastBest=0;lastBest<4;++i,++lastBest){sweepLayerGraphs(i%2?downLayerGraphs:upLayerGraphs,i%4>=2);layering=util.buildLayerMatrix(g);var cc=crossCount(g,layering);if(cc<bestCC){lastBest=0;best=_.cloneDeep(layering);bestCC=cc}}assignOrder(g,best)}function buildLayerGraphs(g,ranks,relationship){return _.map(ranks,function(rank){return buildLayerGraph(g,rank,relationship)})}function sweepLayerGraphs(layerGraphs,biasRight){var cg=new Graph;_.each(layerGraphs,function(lg){var root=lg.graph().root;var sorted=sortSubgraph(lg,root,cg,biasRight);_.each(sorted.vs,function(v,i){lg.node(v).order=i});addSubgraphConstraints(lg,cg,sorted.vs)})}function assignOrder(g,layering){_.each(layering,function(layer){_.each(layer,function(v,i){g.node(v).order=i})})}},{"../graphlib":33,"../lodash":36,"../util":55,"./add-subgraph-constraints":39,"./build-layer-graph":41,"./cross-count":42,"./init-order":44,"./sort-subgraph":46}],44:[function(require,module,exports){"use strict";var _=require("../lodash");module.exports=initOrder;function initOrder(g){var visited={},simpleNodes=_.filter(g.nodes(),function(v){return!g.children(v).length}),maxRank=_.max(_.map(simpleNodes,function(v){return g.node(v).rank})),layers=_.map(_.range(maxRank+1),function(){return[]});function dfs(v){if(_.has(visited,v))return;visited[v]=true;var node=g.node(v);layers[node.rank].push(v);_.each(g.successors(v),dfs)}var orderedVs=_.sortBy(simpleNodes,function(v){return g.node(v).rank});_.each(orderedVs,dfs);return layers}},{"../lodash":36}],45:[function(require,module,exports){"use strict";var _=require("../lodash");module.exports=resolveConflicts;function resolveConflicts(entries,cg){var mappedEntries={};_.each(entries,function(entry,i){var tmp=mappedEntries[entry.v]={indegree:0,"in":[],out:[],vs:[entry.v],i:i};if(!_.isUndefined(entry.barycenter)){tmp.barycenter=entry.barycenter;tmp.weight=entry.weight}});_.each(cg.edges(),function(e){var entryV=mappedEntries[e.v],entryW=mappedEntries[e.w];if(!_.isUndefined(entryV)&&!_.isUndefined(entryW)){entryW.indegree++;entryV.out.push(mappedEntries[e.w])}});var sourceSet=_.filter(mappedEntries,function(entry){return!entry.indegree});return doResolveConflicts(sourceSet)}function doResolveConflicts(sourceSet){var entries=[];function handleIn(vEntry){return function(uEntry){if(uEntry.merged){return}if(_.isUndefined(uEntry.barycenter)||_.isUndefined(vEntry.barycenter)||uEntry.barycenter>=vEntry.barycenter){mergeEntries(vEntry,uEntry)}}}function handleOut(vEntry){return function(wEntry){wEntry["in"].push(vEntry);if(--wEntry.indegree===0){sourceSet.push(wEntry)}}}while(sourceSet.length){var entry=sourceSet.pop();entries.push(entry);_.each(entry["in"].reverse(),handleIn(entry));_.each(entry.out,handleOut(entry))}return _.chain(entries).filter(function(entry){return!entry.merged}).map(function(entry){return _.pick(entry,["vs","i","barycenter","weight"])}).value()}function mergeEntries(target,source){var sum=0,weight=0;if(target.weight){sum+=target.barycenter*target.weight;weight+=target.weight}if(source.weight){sum+=source.barycenter*source.weight;weight+=source.weight}target.vs=source.vs.concat(target.vs);target.barycenter=sum/weight;target.weight=weight;target.i=Math.min(source.i,target.i);source.merged=true}},{"../lodash":36}],46:[function(require,module,exports){var _=require("../lodash"),barycenter=require("./barycenter"),resolveConflicts=require("./resolve-conflicts"),sort=require("./sort");module.exports=sortSubgraph;function sortSubgraph(g,v,cg,biasRight){var movable=g.children(v),node=g.node(v),bl=node?node.borderLeft:undefined,br=node?node.borderRight:undefined,subgraphs={};if(bl){movable=_.filter(movable,function(w){return w!==bl&&w!==br})}var barycenters=barycenter(g,movable);_.each(barycenters,function(entry){if(g.children(entry.v).length){var subgraphResult=sortSubgraph(g,entry.v,cg,biasRight);subgraphs[entry.v]=subgraphResult;if(_.has(subgraphResult,"barycenter")){mergeBarycenters(entry,subgraphResult)}}});var entries=resolveConflicts(barycenters,cg);expandSubgraphs(entries,subgraphs);var result=sort(entries,biasRight);if(bl){result.vs=_.flatten([bl,result.vs,br],true);if(g.predecessors(bl).length){var blPred=g.node(g.predecessors(bl)[0]),brPred=g.node(g.predecessors(br)[0]);if(!_.has(result,"barycenter")){result.barycenter=0;result.weight=0}result.barycenter=(result.barycenter*result.weight+blPred.order+brPred.order)/(result.weight+2);result.weight+=2}}return result}function expandSubgraphs(entries,subgraphs){_.each(entries,function(entry){entry.vs=_.flatten(entry.vs.map(function(v){if(subgraphs[v]){return subgraphs[v].vs}return v}),true)})}function mergeBarycenters(target,other){if(!_.isUndefined(target.barycenter)){target.barycenter=(target.barycenter*target.weight+other.barycenter*other.weight)/(target.weight+other.weight);target.weight+=other.weight}else{target.barycenter=other.barycenter;target.weight=other.weight}}},{"../lodash":36,"./barycenter":40,"./resolve-conflicts":45,"./sort":47}],47:[function(require,module,exports){var _=require("../lodash"),util=require("../util");module.exports=sort;function sort(entries,biasRight){var parts=util.partition(entries,function(entry){return _.has(entry,"barycenter")});var sortable=parts.lhs,unsortable=_.sortBy(parts.rhs,function(entry){return-entry.i}),vs=[],sum=0,weight=0,vsIndex=0;sortable.sort(compareWithBias(!!biasRight));vsIndex=consumeUnsortable(vs,unsortable,vsIndex);_.each(sortable,function(entry){vsIndex+=entry.vs.length;vs.push(entry.vs);sum+=entry.barycenter*entry.weight;weight+=entry.weight;vsIndex=consumeUnsortable(vs,unsortable,vsIndex)});var result={vs:_.flatten(vs,true)};if(weight){result.barycenter=sum/weight;result.weight=weight}return result}function consumeUnsortable(vs,unsortable,index){var last;while(unsortable.length&&(last=_.last(unsortable)).i<=index){unsortable.pop();vs.push(last.vs);index++}return index}function compareWithBias(bias){return function(entryV,entryW){if(entryV.barycenter<entryW.barycenter){return-1}else if(entryV.barycenter>entryW.barycenter){return 1}return!bias?entryV.i-entryW.i:entryW.i-entryV.i}}},{"../lodash":36,"../util":55}],48:[function(require,module,exports){var _=require("./lodash");module.exports=parentDummyChains;function parentDummyChains(g){var postorderNums=postorder(g);_.each(g.graph().dummyChains,function(v){var node=g.node(v),edgeObj=node.edgeObj,pathData=findPath(g,postorderNums,edgeObj.v,edgeObj.w),path=pathData.path,lca=pathData.lca,pathIdx=0,pathV=path[pathIdx],ascending=true;while(v!==edgeObj.w){node=g.node(v);if(ascending){while((pathV=path[pathIdx])!==lca&&g.node(pathV).maxRank<node.rank){pathIdx++}if(pathV===lca){ascending=false}}if(!ascending){while(pathIdx<path.length-1&&g.node(pathV=path[pathIdx+1]).minRank<=node.rank){pathIdx++}pathV=path[pathIdx]}g.setParent(v,pathV);v=g.successors(v)[0]}})}function findPath(g,postorderNums,v,w){var vPath=[],wPath=[],low=Math.min(postorderNums[v].low,postorderNums[w].low),lim=Math.max(postorderNums[v].lim,postorderNums[w].lim),parent,lca;parent=v;do{parent=g.parent(parent);vPath.push(parent)}while(parent&&(postorderNums[parent].low>low||lim>postorderNums[parent].lim));lca=parent;parent=w;while((parent=g.parent(parent))!==lca){wPath.push(parent)}return{path:vPath.concat(wPath.reverse()),lca:lca}}function postorder(g){var result={},lim=0;function dfs(v){var low=lim;_.each(g.children(v),dfs);result[v]={low:low,lim:lim++}}_.each(g.children(),dfs);return result}},{"./lodash":36}],49:[function(require,module,exports){"use strict";var _=require("../lodash"),Graph=require("../graphlib").Graph,util=require("../util");module.exports={positionX:positionX,findType1Conflicts:findType1Conflicts,findType2Conflicts:findType2Conflicts,addConflict:addConflict,hasConflict:hasConflict,verticalAlignment:verticalAlignment,horizontalCompaction:horizontalCompaction,alignCoordinates:alignCoordinates,findSmallestWidthAlignment:findSmallestWidthAlignment,balance:balance};function findType1Conflicts(g,layering){var conflicts={};function visitLayer(prevLayer,layer){var k0=0,scanPos=0,prevLayerLength=prevLayer.length,lastNode=_.last(layer);_.each(layer,function(v,i){var w=findOtherInnerSegmentNode(g,v),k1=w?g.node(w).order:prevLayerLength;if(w||v===lastNode){_.each(layer.slice(scanPos,i+1),function(scanNode){_.each(g.predecessors(scanNode),function(u){var uLabel=g.node(u),uPos=uLabel.order;if((uPos<k0||k1<uPos)&&!(uLabel.dummy&&g.node(scanNode).dummy)){addConflict(conflicts,u,scanNode)}})});scanPos=i+1;k0=k1}});return layer}_.reduce(layering,visitLayer);return conflicts}function findType2Conflicts(g,layering){var conflicts={};function scan(south,southPos,southEnd,prevNorthBorder,nextNorthBorder){var v;_.each(_.range(southPos,southEnd),function(i){v=south[i];if(g.node(v).dummy){_.each(g.predecessors(v),function(u){var uNode=g.node(u);if(uNode.dummy&&(uNode.order<prevNorthBorder||uNode.order>nextNorthBorder)){addConflict(conflicts,u,v)}})}})}function visitLayer(north,south){var prevNorthPos=-1,nextNorthPos,southPos=0;_.each(south,function(v,southLookahead){if(g.node(v).dummy==="border"){var predecessors=g.predecessors(v);if(predecessors.length){nextNorthPos=g.node(predecessors[0]).order;scan(south,southPos,southLookahead,prevNorthPos,nextNorthPos);southPos=southLookahead;prevNorthPos=nextNorthPos}}scan(south,southPos,south.length,nextNorthPos,north.length)});return south}_.reduce(layering,visitLayer);return conflicts}function findOtherInnerSegmentNode(g,v){if(g.node(v).dummy){return _.find(g.predecessors(v),function(u){return g.node(u).dummy})}}function addConflict(conflicts,v,w){if(v>w){var tmp=v;v=w;w=tmp}var conflictsV=conflicts[v];if(!conflictsV){conflicts[v]=conflictsV={}}conflictsV[w]=true}function hasConflict(conflicts,v,w){if(v>w){var tmp=v;v=w;w=tmp}return _.has(conflicts[v],w)}function verticalAlignment(g,layering,conflicts,neighborFn){var root={},align={},pos={};_.each(layering,function(layer){_.each(layer,function(v,order){root[v]=v;align[v]=v;pos[v]=order})});_.each(layering,function(layer){var prevIdx=-1;_.each(layer,function(v){var ws=neighborFn(v);if(ws.length){ws=_.sortBy(ws,function(w){return pos[w]});var mp=(ws.length-1)/2;for(var i=Math.floor(mp),il=Math.ceil(mp);i<=il;++i){var w=ws[i];if(align[v]===v&&prevIdx<pos[w]&&!hasConflict(conflicts,v,w)){align[w]=v;align[v]=root[v]=root[w];prevIdx=pos[w]}}}})});return{root:root,align:align}}function horizontalCompaction(g,layering,root,align,reverseSep){var xs={},blockG=buildBlockGraph(g,layering,root,reverseSep);var visited={};function pass1(v){if(!_.has(visited,v)){visited[v]=true;xs[v]=_.reduce(blockG.inEdges(v),function(max,e){pass1(e.v);return Math.max(max,xs[e.v]+blockG.edge(e))},0)}}_.each(blockG.nodes(),pass1);function pass2(v){if(visited[v]!==2){visited[v]++;var min=_.reduce(blockG.outEdges(v),function(min,e){pass2(e.w);return Math.min(min,xs[e.w]-blockG.edge(e))},Number.POSITIVE_INFINITY);if(min!==Number.POSITIVE_INFINITY){xs[v]=Math.max(xs[v],min)}}}_.each(blockG.nodes(),pass2);_.each(align,function(v){xs[v]=xs[root[v]]});return xs}function buildBlockGraph(g,layering,root,reverseSep){var blockGraph=new Graph,graphLabel=g.graph(),sepFn=sep(graphLabel.nodesep,graphLabel.edgesep,reverseSep);_.each(layering,function(layer){var u;_.each(layer,function(v){var vRoot=root[v];blockGraph.setNode(vRoot);if(u){var uRoot=root[u],prevMax=blockGraph.edge(uRoot,vRoot);blockGraph.setEdge(uRoot,vRoot,Math.max(sepFn(g,v,u),prevMax||0))}u=v})});return blockGraph}function findSmallestWidthAlignment(g,xss){return _.min(xss,function(xs){var min=_.min(xs,function(x,v){return x-width(g,v)/2}),max=_.max(xs,function(x,v){return x+width(g,v)/2});return max-min})}function alignCoordinates(xss,alignTo){var alignToMin=_.min(alignTo),alignToMax=_.max(alignTo);_.each(["u","d"],function(vert){_.each(["l","r"],function(horiz){var alignment=vert+horiz,xs=xss[alignment],delta;if(xs===alignTo)return;delta=horiz==="l"?alignToMin-_.min(xs):alignToMax-_.max(xs);if(delta){xss[alignment]=_.mapValues(xs,function(x){return x+delta})}})})}function balance(xss,align){return _.mapValues(xss.ul,function(ignore,v){if(align){return xss[align.toLowerCase()][v]}else{var xs=_.sortBy(_.pluck(xss,v));return(xs[1]+xs[2])/2}})}function positionX(g){var layering=util.buildLayerMatrix(g),conflicts=_.merge(findType1Conflicts(g,layering),findType2Conflicts(g,layering));var xss={},adjustedLayering;_.each(["u","d"],function(vert){adjustedLayering=vert==="u"?layering:_.values(layering).reverse();_.each(["l","r"],function(horiz){if(horiz==="r"){adjustedLayering=_.map(adjustedLayering,function(inner){return _.values(inner).reverse()})}var neighborFn=_.bind(vert==="u"?g.predecessors:g.successors,g);var align=verticalAlignment(g,adjustedLayering,conflicts,neighborFn);var xs=horizontalCompaction(g,adjustedLayering,align.root,align.align,horiz==="r");if(horiz==="r"){xs=_.mapValues(xs,function(x){return-x})}xss[vert+horiz]=xs})});var smallestWidth=findSmallestWidthAlignment(g,xss);alignCoordinates(xss,smallestWidth);return balance(xss,g.graph().align)}function sep(nodeSep,edgeSep,reverseSep){return function(g,v,w){var vLabel=g.node(v),wLabel=g.node(w),sum=0,delta;sum+=vLabel.width/2;if(_.has(vLabel,"labelpos")){switch(vLabel.labelpos.toLowerCase()){case"l":delta=-vLabel.width/2;break;case"r":delta=vLabel.width/2;break}}if(delta){sum+=reverseSep?delta:-delta}delta=0;sum+=(vLabel.dummy?edgeSep:nodeSep)/2;sum+=(wLabel.dummy?edgeSep:nodeSep)/2;sum+=wLabel.width/2;if(_.has(wLabel,"labelpos")){switch(wLabel.labelpos.toLowerCase()){case"l":delta=wLabel.width/2;break;case"r":delta=-wLabel.width/2;break}}if(delta){sum+=reverseSep?delta:-delta}delta=0;return sum}}function width(g,v){return g.node(v).width}},{"../graphlib":33,"../lodash":36,"../util":55}],50:[function(require,module,exports){"use strict";var _=require("../lodash"),util=require("../util"),positionX=require("./bk").positionX;module.exports=position;function position(g){g=util.asNonCompoundGraph(g);positionY(g);_.each(positionX(g),function(x,v){g.node(v).x=x})}function positionY(g){var layering=util.buildLayerMatrix(g),rankSep=g.graph().ranksep,prevY=0;_.each(layering,function(layer){var maxHeight=_.max(_.map(layer,function(v){return g.node(v).height}));_.each(layer,function(v){g.node(v).y=prevY+maxHeight/2});prevY+=maxHeight+rankSep})}},{"../lodash":36,"../util":55,"./bk":49}],51:[function(require,module,exports){"use strict";var _=require("../lodash"),Graph=require("../graphlib").Graph,slack=require("./util").slack;module.exports=feasibleTree;function feasibleTree(g){var t=new Graph({directed:false});var start=g.nodes()[0],size=g.nodeCount();t.setNode(start,{});var edge,delta;while(tightTree(t,g)<size){edge=findMinSlackEdge(t,g);delta=t.hasNode(edge.v)?slack(g,edge):-slack(g,edge);shiftRanks(t,g,delta)}return t}function tightTree(t,g){function dfs(v){_.each(g.nodeEdges(v),function(e){var edgeV=e.v,w=v===edgeV?e.w:edgeV;if(!t.hasNode(w)&&!slack(g,e)){t.setNode(w,{});t.setEdge(v,w,{});dfs(w)}})}_.each(t.nodes(),dfs);return t.nodeCount()}function findMinSlackEdge(t,g){return _.min(g.edges(),function(e){if(t.hasNode(e.v)!==t.hasNode(e.w)){return slack(g,e)}})}function shiftRanks(t,g,delta){_.each(t.nodes(),function(v){g.node(v).rank+=delta})}},{"../graphlib":33,"../lodash":36,"./util":54}],52:[function(require,module,exports){"use strict";var rankUtil=require("./util"),longestPath=rankUtil.longestPath,feasibleTree=require("./feasible-tree"),networkSimplex=require("./network-simplex");module.exports=rank;function rank(g){switch(g.graph().ranker){case"network-simplex":networkSimplexRanker(g);break;case"tight-tree":tightTreeRanker(g);break;case"longest-path":longestPathRanker(g);break;default:networkSimplexRanker(g)}}var longestPathRanker=longestPath;function tightTreeRanker(g){longestPath(g);feasibleTree(g)}function networkSimplexRanker(g){networkSimplex(g)}},{"./feasible-tree":51,"./network-simplex":53,"./util":54}],53:[function(require,module,exports){"use strict";var _=require("../lodash"),feasibleTree=require("./feasible-tree"),slack=require("./util").slack,initRank=require("./util").longestPath,preorder=require("../graphlib").alg.preorder,postorder=require("../graphlib").alg.postorder,simplify=require("../util").simplify;module.exports=networkSimplex;networkSimplex.initLowLimValues=initLowLimValues;networkSimplex.initCutValues=initCutValues;networkSimplex.calcCutValue=calcCutValue;networkSimplex.leaveEdge=leaveEdge;networkSimplex.enterEdge=enterEdge;networkSimplex.exchangeEdges=exchangeEdges;function networkSimplex(g){g=simplify(g);initRank(g);var t=feasibleTree(g);initLowLimValues(t);initCutValues(t,g);var e,f;while(e=leaveEdge(t)){f=enterEdge(t,g,e);exchangeEdges(t,g,e,f)}}function initCutValues(t,g){var vs=postorder(t,t.nodes());vs=vs.slice(0,vs.length-1);_.each(vs,function(v){assignCutValue(t,g,v)})}function assignCutValue(t,g,child){var childLab=t.node(child),parent=childLab.parent;t.edge(child,parent).cutvalue=calcCutValue(t,g,child)}function calcCutValue(t,g,child){var childLab=t.node(child),parent=childLab.parent,childIsTail=true,graphEdge=g.edge(child,parent),cutValue=0;if(!graphEdge){childIsTail=false;graphEdge=g.edge(parent,child)}cutValue=graphEdge.weight;_.each(g.nodeEdges(child),function(e){var isOutEdge=e.v===child,other=isOutEdge?e.w:e.v;if(other!==parent){var pointsToHead=isOutEdge===childIsTail,otherWeight=g.edge(e).weight;cutValue+=pointsToHead?otherWeight:-otherWeight;if(isTreeEdge(t,child,other)){var otherCutValue=t.edge(child,other).cutvalue;cutValue+=pointsToHead?-otherCutValue:otherCutValue}}});return cutValue}function initLowLimValues(tree,root){if(arguments.length<2){root=tree.nodes()[0]}dfsAssignLowLim(tree,{},1,root)}function dfsAssignLowLim(tree,visited,nextLim,v,parent){var low=nextLim,label=tree.node(v);visited[v]=true;_.each(tree.neighbors(v),function(w){if(!_.has(visited,w)){nextLim=dfsAssignLowLim(tree,visited,nextLim,w,v)}});label.low=low;label.lim=nextLim++;if(parent){label.parent=parent}else{delete label.parent}return nextLim}function leaveEdge(tree){return _.find(tree.edges(),function(e){return tree.edge(e).cutvalue<0})}function enterEdge(t,g,edge){var v=edge.v,w=edge.w;if(!g.hasEdge(v,w)){v=edge.w;w=edge.v}var vLabel=t.node(v),wLabel=t.node(w),tailLabel=vLabel,flip=false;if(vLabel.lim>wLabel.lim){tailLabel=wLabel;flip=true}var candidates=_.filter(g.edges(),function(edge){return flip===isDescendant(t,t.node(edge.v),tailLabel)&&flip!==isDescendant(t,t.node(edge.w),tailLabel)});return _.min(candidates,function(edge){return slack(g,edge)})}function exchangeEdges(t,g,e,f){var v=e.v,w=e.w;t.removeEdge(v,w);t.setEdge(f.v,f.w,{});initLowLimValues(t);initCutValues(t,g);updateRanks(t,g)}function updateRanks(t,g){var root=_.find(t.nodes(),function(v){return!g.node(v).parent}),vs=preorder(t,root);vs=vs.slice(1);_.each(vs,function(v){var parent=t.node(v).parent,edge=g.edge(v,parent),flipped=false;if(!edge){edge=g.edge(parent,v);flipped=true}g.node(v).rank=g.node(parent).rank+(flipped?edge.minlen:-edge.minlen)})}function isTreeEdge(tree,u,v){return tree.hasEdge(u,v)}function isDescendant(tree,vLabel,rootLabel){return rootLabel.low<=vLabel.lim&&vLabel.lim<=rootLabel.lim}},{"../graphlib":33,"../lodash":36,"../util":55,"./feasible-tree":51,"./util":54}],54:[function(require,module,exports){"use strict";var _=require("../lodash");module.exports={longestPath:longestPath,slack:slack};function longestPath(g){var visited={};function dfs(v){var label=g.node(v);if(_.has(visited,v)){return label.rank}visited[v]=true;var rank=_.min(_.map(g.outEdges(v),function(e){return dfs(e.w)-g.edge(e).minlen}));if(rank===Number.POSITIVE_INFINITY){rank=0}return label.rank=rank}_.each(g.sources(),dfs)}function slack(g,e){return g.node(e.w).rank-g.node(e.v).rank-g.edge(e).minlen}},{"../lodash":36}],55:[function(require,module,exports){"use strict";var _=require("./lodash"),Graph=require("./graphlib").Graph;module.exports={addDummyNode:addDummyNode,simplify:simplify,asNonCompoundGraph:asNonCompoundGraph,successorWeights:successorWeights,predecessorWeights:predecessorWeights,intersectRect:intersectRect,buildLayerMatrix:buildLayerMatrix,normalizeRanks:normalizeRanks,removeEmptyRanks:removeEmptyRanks,addBorderNode:addBorderNode,maxRank:maxRank,partition:partition,time:time,notime:notime};function addDummyNode(g,type,attrs,name){var v;
 
-stamp=now();thisArg=this;trailingCall=trailing&&(timeoutId||!leading);if(maxWait===false){var leadingCall=leading&&!timeoutId}else{if(!maxTimeoutId&&!leading){lastCalled=stamp}var remaining=maxWait-(stamp-lastCalled),isCalled=remaining<=0;if(isCalled){if(maxTimeoutId){maxTimeoutId=clearTimeout(maxTimeoutId)}lastCalled=stamp;result=func.apply(thisArg,args)}else if(!maxTimeoutId){maxTimeoutId=setTimeout(maxDelayed,remaining)}}if(isCalled&&timeoutId){timeoutId=clearTimeout(timeoutId)}else if(!timeoutId&&wait!==maxWait){timeoutId=setTimeout(delayed,wait)}if(leadingCall){isCalled=true;result=func.apply(thisArg,args)}if(isCalled&&!timeoutId&&!maxTimeoutId){args=thisArg=null}return result}}function defer(func){if(!isFunction(func)){throw new TypeError}var args=slice(arguments,1);return setTimeout(function(){func.apply(undefined,args)},1)}function delay(func,wait){if(!isFunction(func)){throw new TypeError}var args=slice(arguments,2);return setTimeout(function(){func.apply(undefined,args)},wait)}function memoize(func,resolver){if(!isFunction(func)){throw new TypeError}var memoized=function(){var cache=memoized.cache,key=resolver?resolver.apply(this,arguments):keyPrefix+arguments[0];return hasOwnProperty.call(cache,key)?cache[key]:cache[key]=func.apply(this,arguments)};memoized.cache={};return memoized}function once(func){var ran,result;if(!isFunction(func)){throw new TypeError}return function(){if(ran){return result}ran=true;result=func.apply(this,arguments);func=null;return result}}function partial(func){return createWrapper(func,16,slice(arguments,1))}function partialRight(func){return createWrapper(func,32,null,slice(arguments,1))}function throttle(func,wait,options){var leading=true,trailing=true;if(!isFunction(func)){throw new TypeError}if(options===false){leading=false}else if(isObject(options)){leading="leading"in options?options.leading:leading;trailing="trailing"in options?options.trailing:trailing}debounceOptions.leading=leading;debounceOptions.maxWait=wait;debounceOptions.trailing=trailing;return debounce(func,wait,debounceOptions)}function wrap(value,wrapper){return createWrapper(wrapper,16,[value])}function constant(value){return function(){return value}}function createCallback(func,thisArg,argCount){var type=typeof func;if(func==null||type=="function"){return baseCreateCallback(func,thisArg,argCount)}if(type!="object"){return property(func)}var props=keys(func),key=props[0],a=func[key];if(props.length==1&&a===a&&!isObject(a)){return function(object){var b=object[key];return a===b&&(a!==0||1/a==1/b)}}return function(object){var length=props.length,result=false;while(length--){if(!(result=baseIsEqual(object[props[length]],func[props[length]],null,true))){break}}return result}}function escape(string){return string==null?"":String(string).replace(reUnescapedHtml,escapeHtmlChar)}function identity(value){return value}function mixin(object,source,options){var chain=true,methodNames=source&&functions(source);if(!source||!options&&!methodNames.length){if(options==null){options=source}ctor=lodashWrapper;source=object;object=lodash;methodNames=functions(source)}if(options===false){chain=false}else if(isObject(options)&&"chain"in options){chain=options.chain}var ctor=object,isFunc=isFunction(ctor);forEach(methodNames,function(methodName){var func=object[methodName]=source[methodName];if(isFunc){ctor.prototype[methodName]=function(){var chainAll=this.__chain__,value=this.__wrapped__,args=[value];push.apply(args,arguments);var result=func.apply(object,args);if(chain||chainAll){if(value===result&&isObject(result)){return this}result=new ctor(result);result.__chain__=chainAll}return result}}})}function noConflict(){context._=oldDash;return this}function noop(){}var now=isNative(now=Date.now)&&now||function(){return(new Date).getTime()};var parseInt=nativeParseInt(whitespace+"08")==8?nativeParseInt:function(value,radix){return nativeParseInt(isString(value)?value.replace(reLeadingSpacesAndZeros,""):value,radix||0)};function property(key){return function(object){return object[key]}}function random(min,max,floating){var noMin=min==null,noMax=max==null;if(floating==null){if(typeof min=="boolean"&&noMax){floating=min;min=1}else if(!noMax&&typeof max=="boolean"){floating=max;noMax=true}}if(noMin&&noMax){max=1}min=+min||0;if(noMax){max=min;min=0}else{max=+max||0}if(floating||min%1||max%1){var rand=nativeRandom();return nativeMin(min+rand*(max-min+parseFloat("1e-"+((rand+"").length-1))),max)}return baseRandom(min,max)}function result(object,key){if(object){var value=object[key];return isFunction(value)?object[key]():value}}function template(text,data,options){var settings=lodash.templateSettings;text=String(text||"");options=defaults({},options,settings);var imports=defaults({},options.imports,settings.imports),importsKeys=keys(imports),importsValues=values(imports);var isEvaluating,index=0,interpolate=options.interpolate||reNoMatch,source="__p += '";var reDelimiters=RegExp((options.escape||reNoMatch).source+"|"+interpolate.source+"|"+(interpolate===reInterpolate?reEsTemplate:reNoMatch).source+"|"+(options.evaluate||reNoMatch).source+"|$","g");text.replace(reDelimiters,function(match,escapeValue,interpolateValue,esTemplateValue,evaluateValue,offset){interpolateValue||(interpolateValue=esTemplateValue);source+=text.slice(index,offset).replace(reUnescapedString,escapeStringChar);if(escapeValue){source+="' +\n__e("+escapeValue+") +\n'"}if(evaluateValue){isEvaluating=true;source+="';\n"+evaluateValue+";\n__p += '"}if(interpolateValue){source+="' +\n((__t = ("+interpolateValue+")) == null ? '' : __t) +\n'"}index=offset+match.length;return match});source+="';\n";var variable=options.variable,hasVariable=variable;if(!hasVariable){variable="obj";source="with ("+variable+") {\n"+source+"\n}\n"}source=(isEvaluating?source.replace(reEmptyStringLeading,""):source).replace(reEmptyStringMiddle,"$1").replace(reEmptyStringTrailing,"$1;");source="function("+variable+") {\n"+(hasVariable?"":variable+" || ("+variable+" = {});\n")+"var __t, __p = '', __e = _.escape"+(isEvaluating?", __j = Array.prototype.join;\n"+"function print() { __p += __j.call(arguments, '') }\n":";\n")+source+"return __p\n}";var sourceURL="\n/*\n//# sourceURL="+(options.sourceURL||"/lodash/template/source["+templateCounter++ +"]")+"\n*/";try{var result=Function(importsKeys,"return "+source+sourceURL).apply(undefined,importsValues)}catch(e){e.source=source;throw e}if(data){return result(data)}result.source=source;return result}function times(n,callback,thisArg){n=(n=+n)>-1?n:0;var index=-1,result=Array(n);callback=baseCreateCallback(callback,thisArg,1);while(++index<n){result[index]=callback(index)}return result}function unescape(string){return string==null?"":String(string).replace(reEscapedHtml,unescapeHtmlChar)}function uniqueId(prefix){var id=++idCounter;return String(prefix==null?"":prefix)+id}function chain(value){value=new lodashWrapper(value);value.__chain__=true;return value}function tap(value,interceptor){interceptor(value);return value}function wrapperChain(){this.__chain__=true;return this}function wrapperToString(){return String(this.__wrapped__)}function wrapperValueOf(){return this.__wrapped__}lodash.after=after;lodash.assign=assign;lodash.at=at;lodash.bind=bind;lodash.bindAll=bindAll;lodash.bindKey=bindKey;lodash.chain=chain;lodash.compact=compact;lodash.compose=compose;lodash.constant=constant;lodash.countBy=countBy;lodash.create=create;lodash.createCallback=createCallback;lodash.curry=curry;lodash.debounce=debounce;lodash.defaults=defaults;lodash.defer=defer;lodash.delay=delay;lodash.difference=difference;lodash.filter=filter;lodash.flatten=flatten;lodash.forEach=forEach;lodash.forEachRight=forEachRight;lodash.forIn=forIn;lodash.forInRight=forInRight;lodash.forOwn=forOwn;lodash.forOwnRight=forOwnRight;lodash.functions=functions;lodash.groupBy=groupBy;lodash.indexBy=indexBy;lodash.initial=initial;lodash.intersection=intersection;lodash.invert=invert;lodash.invoke=invoke;lodash.keys=keys;lodash.map=map;lodash.mapValues=mapValues;lodash.max=max;lodash.memoize=memoize;lodash.merge=merge;lodash.min=min;lodash.omit=omit;lodash.once=once;lodash.pairs=pairs;lodash.partial=partial;lodash.partialRight=partialRight;lodash.pick=pick;lodash.pluck=pluck;lodash.property=property;lodash.pull=pull;lodash.range=range;lodash.reject=reject;lodash.remove=remove;lodash.rest=rest;lodash.shuffle=shuffle;lodash.sortBy=sortBy;lodash.tap=tap;lodash.throttle=throttle;lodash.times=times;lodash.toArray=toArray;lodash.transform=transform;lodash.union=union;lodash.uniq=uniq;lodash.values=values;lodash.where=where;lodash.without=without;lodash.wrap=wrap;lodash.xor=xor;lodash.zip=zip;lodash.zipObject=zipObject;lodash.collect=map;lodash.drop=rest;lodash.each=forEach;lodash.eachRight=forEachRight;lodash.extend=assign;lodash.methods=functions;lodash.object=zipObject;lodash.select=filter;lodash.tail=rest;lodash.unique=uniq;lodash.unzip=zip;mixin(lodash);lodash.clone=clone;lodash.cloneDeep=cloneDeep;lodash.contains=contains;lodash.escape=escape;lodash.every=every;lodash.find=find;lodash.findIndex=findIndex;lodash.findKey=findKey;lodash.findLast=findLast;lodash.findLastIndex=findLastIndex;lodash.findLastKey=findLastKey;lodash.has=has;lodash.identity=identity;lodash.indexOf=indexOf;lodash.isArguments=isArguments;lodash.isArray=isArray;lodash.isBoolean=isBoolean;lodash.isDate=isDate;lodash.isElement=isElement;lodash.isEmpty=isEmpty;lodash.isEqual=isEqual;lodash.isFinite=isFinite;lodash.isFunction=isFunction;lodash.isNaN=isNaN;lodash.isNull=isNull;lodash.isNumber=isNumber;lodash.isObject=isObject;lodash.isPlainObject=isPlainObject;lodash.isRegExp=isRegExp;lodash.isString=isString;lodash.isUndefined=isUndefined;lodash.lastIndexOf=lastIndexOf;lodash.mixin=mixin;lodash.noConflict=noConflict;lodash.noop=noop;lodash.now=now;lodash.parseInt=parseInt;lodash.random=random;lodash.reduce=reduce;lodash.reduceRight=reduceRight;lodash.result=result;lodash.runInContext=runInContext;lodash.size=size;lodash.some=some;lodash.sortedIndex=sortedIndex;lodash.template=template;lodash.unescape=unescape;lodash.uniqueId=uniqueId;lodash.all=every;lodash.any=some;lodash.detect=find;lodash.findWhere=find;lodash.foldl=reduce;lodash.foldr=reduceRight;lodash.include=contains;lodash.inject=reduce;mixin(function(){var source={};forOwn(lodash,function(func,methodName){if(!lodash.prototype[methodName]){source[methodName]=func}});return source}(),false);lodash.first=first;lodash.last=last;lodash.sample=sample;lodash.take=first;lodash.head=first;forOwn(lodash,function(func,methodName){var callbackable=methodName!=="sample";if(!lodash.prototype[methodName]){lodash.prototype[methodName]=function(n,guard){var chainAll=this.__chain__,result=func(this.__wrapped__,n,guard);return!chainAll&&(n==null||guard&&!(callbackable&&typeof n=="function"))?result:new lodashWrapper(result,chainAll)}}});lodash.VERSION="2.4.1";lodash.prototype.chain=wrapperChain;lodash.prototype.toString=wrapperToString;lodash.prototype.value=wrapperValueOf;lodash.prototype.valueOf=wrapperValueOf;forEach(["join","pop","shift"],function(methodName){var func=arrayRef[methodName];lodash.prototype[methodName]=function(){var chainAll=this.__chain__,result=func.apply(this.__wrapped__,arguments);return chainAll?new lodashWrapper(result,chainAll):result}});forEach(["push","reverse","sort","unshift"],function(methodName){var func=arrayRef[methodName];lodash.prototype[methodName]=function(){func.apply(this.__wrapped__,arguments);return this}});forEach(["concat","slice","splice"],function(methodName){var func=arrayRef[methodName];lodash.prototype[methodName]=function(){return new lodashWrapper(func.apply(this.__wrapped__,arguments),this.__chain__)}});return lodash}var _=runInContext();if(typeof define=="function"&&typeof define.amd=="object"&&define.amd){root._=_;define(function(){return _})}else if(freeExports&&freeModule){if(moduleExports){(freeModule.exports=_)._=_}else{freeExports._=_}}else{root._=_}}).call(this)}).call(this,typeof global!=="undefined"?global:typeof self!=="undefined"?self:typeof window!=="undefined"?window:{})},{}]},{},[1])(1)});
+do{v=_.uniqueId(name)}while(g.hasNode(v));attrs.dummy=type;g.setNode(v,attrs);return v}function simplify(g){var simplified=(new Graph).setGraph(g.graph());_.each(g.nodes(),function(v){simplified.setNode(v,g.node(v))});_.each(g.edges(),function(e){var simpleLabel=simplified.edge(e.v,e.w)||{weight:0,minlen:1},label=g.edge(e);simplified.setEdge(e.v,e.w,{weight:simpleLabel.weight+label.weight,minlen:Math.max(simpleLabel.minlen,label.minlen)})});return simplified}function asNonCompoundGraph(g){var simplified=new Graph({multigraph:g.isMultigraph()}).setGraph(g.graph());_.each(g.nodes(),function(v){if(!g.children(v).length){simplified.setNode(v,g.node(v))}});_.each(g.edges(),function(e){simplified.setEdge(e,g.edge(e))});return simplified}function successorWeights(g){var weightMap=_.map(g.nodes(),function(v){var sucs={};_.each(g.outEdges(v),function(e){sucs[e.w]=(sucs[e.w]||0)+g.edge(e).weight});return sucs});return _.zipObject(g.nodes(),weightMap)}function predecessorWeights(g){var weightMap=_.map(g.nodes(),function(v){var preds={};_.each(g.inEdges(v),function(e){preds[e.v]=(preds[e.v]||0)+g.edge(e).weight});return preds});return _.zipObject(g.nodes(),weightMap)}function intersectRect(rect,point){var x=rect.x;var y=rect.y;var dx=point.x-x;var dy=point.y-y;var w=rect.width/2;var h=rect.height/2;if(!dx&&!dy){throw new Error("Not possible to find intersection inside of the rectangle")}var sx,sy;if(Math.abs(dy)*w>Math.abs(dx)*h){if(dy<0){h=-h}sx=h*dx/dy;sy=h}else{if(dx<0){w=-w}sx=w;sy=w*dy/dx}return{x:x+sx,y:y+sy}}function buildLayerMatrix(g){var layering=_.map(_.range(maxRank(g)+1),function(){return[]});_.each(g.nodes(),function(v){var node=g.node(v),rank=node.rank;if(!_.isUndefined(rank)){layering[rank][node.order]=v}});return layering}function normalizeRanks(g){var min=_.min(_.map(g.nodes(),function(v){return g.node(v).rank}));_.each(g.nodes(),function(v){var node=g.node(v);if(_.has(node,"rank")){node.rank-=min}})}function removeEmptyRanks(g){var offset=_.min(_.map(g.nodes(),function(v){return g.node(v).rank}));var layers=[];_.each(g.nodes(),function(v){var rank=g.node(v).rank-offset;if(!_.has(layers,rank)){layers[rank]=[]}layers[rank].push(v)});var delta=0,nodeRankFactor=g.graph().nodeRankFactor;_.each(layers,function(vs,i){if(_.isUndefined(vs)&&i%nodeRankFactor!==0){--delta}else if(delta){_.each(vs,function(v){g.node(v).rank+=delta})}})}function addBorderNode(g,prefix,rank,order){var node={width:0,height:0};if(arguments.length>=4){node.rank=rank;node.order=order}return addDummyNode(g,"border",node,prefix)}function maxRank(g){return _.max(_.map(g.nodes(),function(v){var rank=g.node(v).rank;if(!_.isUndefined(rank)){return rank}}))}function partition(collection,fn){var result={lhs:[],rhs:[]};_.each(collection,function(value){if(fn(value)){result.lhs.push(value)}else{result.rhs.push(value)}});return result}function time(name,fn){var start=_.now();try{return fn()}finally{console.log(name+" time: "+(_.now()-start)+"ms")}}function notime(name,fn){return fn()}},{"./graphlib":33,"./lodash":36}],56:[function(require,module,exports){module.exports="0.7.1"},{}],57:[function(require,module,exports){var lib=require("./lib");module.exports={Graph:lib.Graph,json:require("./lib/json"),alg:require("./lib/alg"),version:lib.version}},{"./lib":73,"./lib/alg":64,"./lib/json":74}],58:[function(require,module,exports){var _=require("../lodash");module.exports=components;function components(g){var visited={},cmpts=[],cmpt;function dfs(v){if(_.has(visited,v))return;visited[v]=true;cmpt.push(v);_.each(g.successors(v),dfs);_.each(g.predecessors(v),dfs)}_.each(g.nodes(),function(v){cmpt=[];dfs(v);if(cmpt.length){cmpts.push(cmpt)}});return cmpts}},{"../lodash":75}],59:[function(require,module,exports){var _=require("../lodash");module.exports=dfs;function dfs(g,vs,order){if(!_.isArray(vs)){vs=[vs]}var acc=[],visited={};_.each(vs,function(v){if(!g.hasNode(v)){throw new Error("Graph does not have node: "+v)}doDfs(g,v,order==="post",visited,acc)});return acc}function doDfs(g,v,postorder,visited,acc){if(!_.has(visited,v)){visited[v]=true;if(!postorder){acc.push(v)}_.each(g.neighbors(v),function(w){doDfs(g,w,postorder,visited,acc)});if(postorder){acc.push(v)}}}},{"../lodash":75}],60:[function(require,module,exports){var dijkstra=require("./dijkstra"),_=require("../lodash");module.exports=dijkstraAll;function dijkstraAll(g,weightFunc,edgeFunc){return _.transform(g.nodes(),function(acc,v){acc[v]=dijkstra(g,v,weightFunc,edgeFunc)},{})}},{"../lodash":75,"./dijkstra":61}],61:[function(require,module,exports){var _=require("../lodash"),PriorityQueue=require("../data/priority-queue");module.exports=dijkstra;var DEFAULT_WEIGHT_FUNC=_.constant(1);function dijkstra(g,source,weightFn,edgeFn){return runDijkstra(g,String(source),weightFn||DEFAULT_WEIGHT_FUNC,edgeFn||function(v){return g.outEdges(v)})}function runDijkstra(g,source,weightFn,edgeFn){var results={},pq=new PriorityQueue,v,vEntry;var updateNeighbors=function(edge){var w=edge.v!==v?edge.v:edge.w,wEntry=results[w],weight=weightFn(edge),distance=vEntry.distance+weight;if(weight<0){throw new Error("dijkstra does not allow negative edge weights. "+"Bad edge: "+edge+" Weight: "+weight)}if(distance<wEntry.distance){wEntry.distance=distance;wEntry.predecessor=v;pq.decrease(w,distance)}};g.nodes().forEach(function(v){var distance=v===source?0:Number.POSITIVE_INFINITY;results[v]={distance:distance};pq.add(v,distance)});while(pq.size()>0){v=pq.removeMin();vEntry=results[v];if(vEntry.distance===Number.POSITIVE_INFINITY){break}edgeFn(v).forEach(updateNeighbors)}return results}},{"../data/priority-queue":71,"../lodash":75}],62:[function(require,module,exports){var _=require("../lodash"),tarjan=require("./tarjan");module.exports=findCycles;function findCycles(g){return _.filter(tarjan(g),function(cmpt){return cmpt.length>1})}},{"../lodash":75,"./tarjan":69}],63:[function(require,module,exports){var _=require("../lodash");module.exports=floydWarshall;var DEFAULT_WEIGHT_FUNC=_.constant(1);function floydWarshall(g,weightFn,edgeFn){return runFloydWarshall(g,weightFn||DEFAULT_WEIGHT_FUNC,edgeFn||function(v){return g.outEdges(v)})}function runFloydWarshall(g,weightFn,edgeFn){var results={},nodes=g.nodes();nodes.forEach(function(v){results[v]={};results[v][v]={distance:0};nodes.forEach(function(w){if(v!==w){results[v][w]={distance:Number.POSITIVE_INFINITY}}});edgeFn(v).forEach(function(edge){var w=edge.v===v?edge.w:edge.v,d=weightFn(edge);results[v][w]={distance:d,predecessor:v}})});nodes.forEach(function(k){var rowK=results[k];nodes.forEach(function(i){var rowI=results[i];nodes.forEach(function(j){var ik=rowI[k];var kj=rowK[j];var ij=rowI[j];var altDistance=ik.distance+kj.distance;if(altDistance<ij.distance){ij.distance=altDistance;ij.predecessor=kj.predecessor}})})});return results}},{"../lodash":75}],64:[function(require,module,exports){module.exports={components:require("./components"),dijkstra:require("./dijkstra"),dijkstraAll:require("./dijkstra-all"),findCycles:require("./find-cycles"),floydWarshall:require("./floyd-warshall"),isAcyclic:require("./is-acyclic"),postorder:require("./postorder"),preorder:require("./preorder"),prim:require("./prim"),tarjan:require("./tarjan"),topsort:require("./topsort")}},{"./components":58,"./dijkstra":61,"./dijkstra-all":60,"./find-cycles":62,"./floyd-warshall":63,"./is-acyclic":65,"./postorder":66,"./preorder":67,"./prim":68,"./tarjan":69,"./topsort":70}],65:[function(require,module,exports){var topsort=require("./topsort");module.exports=isAcyclic;function isAcyclic(g){try{topsort(g)}catch(e){if(e instanceof topsort.CycleException){return false}throw e}return true}},{"./topsort":70}],66:[function(require,module,exports){var dfs=require("./dfs");module.exports=postorder;function postorder(g,vs){return dfs(g,vs,"post")}},{"./dfs":59}],67:[function(require,module,exports){var dfs=require("./dfs");module.exports=preorder;function preorder(g,vs){return dfs(g,vs,"pre")}},{"./dfs":59}],68:[function(require,module,exports){var _=require("../lodash"),Graph=require("../graph"),PriorityQueue=require("../data/priority-queue");module.exports=prim;function prim(g,weightFunc){var result=new Graph,parents={},pq=new PriorityQueue,v;function updateNeighbors(edge){var w=edge.v===v?edge.w:edge.v,pri=pq.priority(w);if(pri!==undefined){var edgeWeight=weightFunc(edge);if(edgeWeight<pri){parents[w]=v;pq.decrease(w,edgeWeight)}}}if(g.nodeCount()===0){return result}_.each(g.nodes(),function(v){pq.add(v,Number.POSITIVE_INFINITY);result.setNode(v)});pq.decrease(g.nodes()[0],0);var init=false;while(pq.size()>0){v=pq.removeMin();if(_.has(parents,v)){result.setEdge(v,parents[v])}else if(init){throw new Error("Input graph is not connected: "+g)}else{init=true}g.nodeEdges(v).forEach(updateNeighbors)}return result}},{"../data/priority-queue":71,"../graph":72,"../lodash":75}],69:[function(require,module,exports){var _=require("../lodash");module.exports=tarjan;function tarjan(g){var index=0,stack=[],visited={},results=[];function dfs(v){var entry=visited[v]={onStack:true,lowlink:index,index:index++};stack.push(v);g.successors(v).forEach(function(w){if(!_.has(visited,w)){dfs(w);entry.lowlink=Math.min(entry.lowlink,visited[w].lowlink)}else if(visited[w].onStack){entry.lowlink=Math.min(entry.lowlink,visited[w].index)}});if(entry.lowlink===entry.index){var cmpt=[],w;do{w=stack.pop();visited[w].onStack=false;cmpt.push(w)}while(v!==w);results.push(cmpt)}}g.nodes().forEach(function(v){if(!_.has(visited,v)){dfs(v)}});return results}},{"../lodash":75}],70:[function(require,module,exports){var _=require("../lodash");module.exports=topsort;topsort.CycleException=CycleException;function topsort(g){var visited={},stack={},results=[];function visit(node){if(_.has(stack,node)){throw new CycleException}if(!_.has(visited,node)){stack[node]=true;visited[node]=true;_.each(g.predecessors(node),visit);delete stack[node];results.push(node)}}_.each(g.sinks(),visit);if(_.size(visited)!==g.nodeCount()){throw new CycleException}return results}function CycleException(){}},{"../lodash":75}],71:[function(require,module,exports){var _=require("../lodash");module.exports=PriorityQueue;function PriorityQueue(){this._arr=[];this._keyIndices={}}PriorityQueue.prototype.size=function(){return this._arr.length};PriorityQueue.prototype.keys=function(){return this._arr.map(function(x){return x.key})};PriorityQueue.prototype.has=function(key){return _.has(this._keyIndices,key)};PriorityQueue.prototype.priority=function(key){var index=this._keyIndices[key];if(index!==undefined){return this._arr[index].priority}};PriorityQueue.prototype.min=function(){if(this.size()===0){throw new Error("Queue underflow")}return this._arr[0].key};PriorityQueue.prototype.add=function(key,priority){var keyIndices=this._keyIndices;key=String(key);if(!_.has(keyIndices,key)){var arr=this._arr;var index=arr.length;keyIndices[key]=index;arr.push({key:key,priority:priority});this._decrease(index);return true}return false};PriorityQueue.prototype.removeMin=function(){this._swap(0,this._arr.length-1);var min=this._arr.pop();delete this._keyIndices[min.key];this._heapify(0);return min.key};PriorityQueue.prototype.decrease=function(key,priority){var index=this._keyIndices[key];if(priority>this._arr[index].priority){throw new Error("New priority is greater than current priority. "+"Key: "+key+" Old: "+this._arr[index].priority+" New: "+priority)}this._arr[index].priority=priority;this._decrease(index)};PriorityQueue.prototype._heapify=function(i){var arr=this._arr;var l=2*i,r=l+1,largest=i;if(l<arr.length){largest=arr[l].priority<arr[largest].priority?l:largest;if(r<arr.length){largest=arr[r].priority<arr[largest].priority?r:largest}if(largest!==i){this._swap(i,largest);this._heapify(largest)}}};PriorityQueue.prototype._decrease=function(index){var arr=this._arr;var priority=arr[index].priority;var parent;while(index!==0){parent=index>>1;if(arr[parent].priority<priority){break}this._swap(index,parent);index=parent}};PriorityQueue.prototype._swap=function(i,j){var arr=this._arr;var keyIndices=this._keyIndices;var origArrI=arr[i];var origArrJ=arr[j];arr[i]=origArrJ;arr[j]=origArrI;keyIndices[origArrJ.key]=i;keyIndices[origArrI.key]=j}},{"../lodash":75}],72:[function(require,module,exports){"use strict";var _=require("./lodash");module.exports=Graph;var DEFAULT_EDGE_NAME="\x00",GRAPH_NODE="\x00",EDGE_KEY_DELIM="";function Graph(opts){this._isDirected=_.has(opts,"directed")?opts.directed:true;this._isMultigraph=_.has(opts,"multigraph")?opts.multigraph:false;this._isCompound=_.has(opts,"compound")?opts.compound:false;this._label=undefined;this._defaultNodeLabelFn=_.constant(undefined);this._defaultEdgeLabelFn=_.constant(undefined);this._nodes={};if(this._isCompound){this._parent={};this._children={};this._children[GRAPH_NODE]={}}this._in={};this._preds={};this._out={};this._sucs={};this._edgeObjs={};this._edgeLabels={}}Graph.prototype._nodeCount=0;Graph.prototype._edgeCount=0;Graph.prototype.isDirected=function(){return this._isDirected};Graph.prototype.isMultigraph=function(){return this._isMultigraph};Graph.prototype.isCompound=function(){return this._isCompound};Graph.prototype.setGraph=function(label){this._label=label;return this};Graph.prototype.graph=function(){return this._label};Graph.prototype.setDefaultNodeLabel=function(newDefault){if(!_.isFunction(newDefault)){newDefault=_.constant(newDefault)}this._defaultNodeLabelFn=newDefault;return this};Graph.prototype.nodeCount=function(){return this._nodeCount};Graph.prototype.nodes=function(){return _.keys(this._nodes)};Graph.prototype.sources=function(){return _.filter(this.nodes(),function(v){return _.isEmpty(this._in[v])},this)};Graph.prototype.sinks=function(){return _.filter(this.nodes(),function(v){return _.isEmpty(this._out[v])},this)};Graph.prototype.setNodes=function(vs,value){var args=arguments;_.each(vs,function(v){if(args.length>1){this.setNode(v,value)}else{this.setNode(v)}},this);return this};Graph.prototype.setNode=function(v,value){if(_.has(this._nodes,v)){if(arguments.length>1){this._nodes[v]=value}return this}this._nodes[v]=arguments.length>1?value:this._defaultNodeLabelFn(v);if(this._isCompound){this._parent[v]=GRAPH_NODE;this._children[v]={};this._children[GRAPH_NODE][v]=true}this._in[v]={};this._preds[v]={};this._out[v]={};this._sucs[v]={};++this._nodeCount;return this};Graph.prototype.node=function(v){return this._nodes[v]};Graph.prototype.hasNode=function(v){return _.has(this._nodes,v)};Graph.prototype.removeNode=function(v){var self=this;if(_.has(this._nodes,v)){var removeEdge=function(e){self.removeEdge(self._edgeObjs[e])};delete this._nodes[v];if(this._isCompound){this._removeFromParentsChildList(v);delete this._parent[v];_.each(this.children(v),function(child){this.setParent(child)},this);delete this._children[v]}_.each(_.keys(this._in[v]),removeEdge);delete this._in[v];delete this._preds[v];_.each(_.keys(this._out[v]),removeEdge);delete this._out[v];delete this._sucs[v];--this._nodeCount}return this};Graph.prototype.setParent=function(v,parent){if(!this._isCompound){throw new Error("Cannot set parent in a non-compound graph")}if(_.isUndefined(parent)){parent=GRAPH_NODE}else{for(var ancestor=parent;!_.isUndefined(ancestor);ancestor=this.parent(ancestor)){if(ancestor===v){throw new Error("Setting "+parent+" as parent of "+v+" would create create a cycle")}}this.setNode(parent)}this.setNode(v);this._removeFromParentsChildList(v);this._parent[v]=parent;this._children[parent][v]=true;return this};Graph.prototype._removeFromParentsChildList=function(v){delete this._children[this._parent[v]][v]};Graph.prototype.parent=function(v){if(this._isCompound){var parent=this._parent[v];if(parent!==GRAPH_NODE){return parent}}};Graph.prototype.children=function(v){if(_.isUndefined(v)){v=GRAPH_NODE}if(this._isCompound){var children=this._children[v];if(children){return _.keys(children)}}else if(v===GRAPH_NODE){return this.nodes()}else if(this.hasNode(v)){return[]}};Graph.prototype.predecessors=function(v){var predsV=this._preds[v];if(predsV){return _.keys(predsV)}};Graph.prototype.successors=function(v){var sucsV=this._sucs[v];if(sucsV){return _.keys(sucsV)}};Graph.prototype.neighbors=function(v){var preds=this.predecessors(v);if(preds){return _.union(preds,this.successors(v))}};Graph.prototype.setDefaultEdgeLabel=function(newDefault){if(!_.isFunction(newDefault)){newDefault=_.constant(newDefault)}this._defaultEdgeLabelFn=newDefault;return this};Graph.prototype.edgeCount=function(){return this._edgeCount};Graph.prototype.edges=function(){return _.values(this._edgeObjs)};Graph.prototype.setPath=function(vs,value){var self=this,args=arguments;_.reduce(vs,function(v,w){if(args.length>1){self.setEdge(v,w,value)}else{self.setEdge(v,w)}return w});return this};Graph.prototype.setEdge=function(){var v,w,name,value,valueSpecified=false;if(_.isPlainObject(arguments[0])){v=arguments[0].v;w=arguments[0].w;name=arguments[0].name;if(arguments.length===2){value=arguments[1];valueSpecified=true}}else{v=arguments[0];w=arguments[1];name=arguments[3];if(arguments.length>2){value=arguments[2];valueSpecified=true}}v=""+v;w=""+w;if(!_.isUndefined(name)){name=""+name}var e=edgeArgsToId(this._isDirected,v,w,name);if(_.has(this._edgeLabels,e)){if(valueSpecified){this._edgeLabels[e]=value}return this}if(!_.isUndefined(name)&&!this._isMultigraph){throw new Error("Cannot set a named edge when isMultigraph = false")}this.setNode(v);this.setNode(w);this._edgeLabels[e]=valueSpecified?value:this._defaultEdgeLabelFn(v,w,name);var edgeObj=edgeArgsToObj(this._isDirected,v,w,name);v=edgeObj.v;w=edgeObj.w;Object.freeze(edgeObj);this._edgeObjs[e]=edgeObj;incrementOrInitEntry(this._preds[w],v);incrementOrInitEntry(this._sucs[v],w);this._in[w][e]=edgeObj;this._out[v][e]=edgeObj;this._edgeCount++;return this};Graph.prototype.edge=function(v,w,name){var e=arguments.length===1?edgeObjToId(this._isDirected,arguments[0]):edgeArgsToId(this._isDirected,v,w,name);return this._edgeLabels[e]};Graph.prototype.hasEdge=function(v,w,name){var e=arguments.length===1?edgeObjToId(this._isDirected,arguments[0]):edgeArgsToId(this._isDirected,v,w,name);return _.has(this._edgeLabels,e)};Graph.prototype.removeEdge=function(v,w,name){var e=arguments.length===1?edgeObjToId(this._isDirected,arguments[0]):edgeArgsToId(this._isDirected,v,w,name),edge=this._edgeObjs[e];if(edge){v=edge.v;w=edge.w;delete this._edgeLabels[e];delete this._edgeObjs[e];decrementOrRemoveEntry(this._preds[w],v);decrementOrRemoveEntry(this._sucs[v],w);delete this._in[w][e];delete this._out[v][e];this._edgeCount--}return this};Graph.prototype.inEdges=function(v,u){var inV=this._in[v];if(inV){var edges=_.values(inV);if(!u){return edges}return _.filter(edges,function(edge){return edge.v===u})}};Graph.prototype.outEdges=function(v,w){var outV=this._out[v];if(outV){var edges=_.values(outV);if(!w){return edges}return _.filter(edges,function(edge){return edge.w===w})}};Graph.prototype.nodeEdges=function(v,w){var inEdges=this.inEdges(v,w);if(inEdges){return inEdges.concat(this.outEdges(v,w))}};function incrementOrInitEntry(map,k){if(_.has(map,k)){map[k]++}else{map[k]=1}}function decrementOrRemoveEntry(map,k){if(!--map[k]){delete map[k]}}function edgeArgsToId(isDirected,v,w,name){if(!isDirected&&v>w){var tmp=v;v=w;w=tmp}return v+EDGE_KEY_DELIM+w+EDGE_KEY_DELIM+(_.isUndefined(name)?DEFAULT_EDGE_NAME:name)}function edgeArgsToObj(isDirected,v,w,name){if(!isDirected&&v>w){var tmp=v;v=w;w=tmp}var edgeObj={v:v,w:w};if(name){edgeObj.name=name}return edgeObj}function edgeObjToId(isDirected,edgeObj){return edgeArgsToId(isDirected,edgeObj.v,edgeObj.w,edgeObj.name)}},{"./lodash":75}],73:[function(require,module,exports){module.exports={Graph:require("./graph"),version:require("./version")}},{"./graph":72,"./version":76}],74:[function(require,module,exports){var _=require("./lodash"),Graph=require("./graph");module.exports={write:write,read:read};function write(g){var json={options:{directed:g.isDirected(),multigraph:g.isMultigraph(),compound:g.isCompound()},nodes:writeNodes(g),edges:writeEdges(g)};if(!_.isUndefined(g.graph())){json.value=_.clone(g.graph())}return json}function writeNodes(g){return _.map(g.nodes(),function(v){var nodeValue=g.node(v),parent=g.parent(v),node={v:v};if(!_.isUndefined(nodeValue)){node.value=nodeValue}if(!_.isUndefined(parent)){node.parent=parent}return node})}function writeEdges(g){return _.map(g.edges(),function(e){var edgeValue=g.edge(e),edge={v:e.v,w:e.w};if(!_.isUndefined(e.name)){edge.name=e.name}if(!_.isUndefined(edgeValue)){edge.value=edgeValue}return edge})}function read(json){var g=new Graph(json.options).setGraph(json.value);_.each(json.nodes,function(entry){g.setNode(entry.v,entry.value);if(entry.parent){g.setParent(entry.v,entry.parent)}});_.each(json.edges,function(entry){g.setEdge({v:entry.v,w:entry.w,name:entry.name},entry.value)});return g}},{"./graph":72,"./lodash":75}],75:[function(require,module,exports){module.exports=require(20)},{"/Users/andrew/Documents/dev/dagre-d3/lib/lodash.js":20,lodash:77}],76:[function(require,module,exports){module.exports="1.0.1"},{}],77:[function(require,module,exports){(function(global){(function(){var undefined;var arrayPool=[],objectPool=[];var idCounter=0;var keyPrefix=+new Date+"";var largeArraySize=75;var maxPoolSize=40;var whitespace=" 	\f \ufeff"+"\n\r\u2028\u2029"+" ᠎             　";var reEmptyStringLeading=/\b__p \+= '';/g,reEmptyStringMiddle=/\b(__p \+=) '' \+/g,reEmptyStringTrailing=/(__e\(.*?\)|\b__t\)) \+\n'';/g;var reEsTemplate=/\$\{([^\\}]*(?:\\.[^\\}]*)*)\}/g;var reFlags=/\w*$/;var reFuncName=/^\s*function[ \n\r\t]+\w/;var reInterpolate=/<%=([\s\S]+?)%>/g;var reLeadingSpacesAndZeros=RegExp("^["+whitespace+"]*0+(?=.$)");var reNoMatch=/($^)/;var reThis=/\bthis\b/;var reUnescapedString=/['\n\r\t\u2028\u2029\\]/g;var contextProps=["Array","Boolean","Date","Function","Math","Number","Object","RegExp","String","_","attachEvent","clearTimeout","isFinite","isNaN","parseInt","setTimeout"];var templateCounter=0;var argsClass="[object Arguments]",arrayClass="[object Array]",boolClass="[object Boolean]",dateClass="[object Date]",funcClass="[object Function]",numberClass="[object Number]",objectClass="[object Object]",regexpClass="[object RegExp]",stringClass="[object String]";var cloneableClasses={};cloneableClasses[funcClass]=false;cloneableClasses[argsClass]=cloneableClasses[arrayClass]=cloneableClasses[boolClass]=cloneableClasses[dateClass]=cloneableClasses[numberClass]=cloneableClasses[objectClass]=cloneableClasses[regexpClass]=cloneableClasses[stringClass]=true;var debounceOptions={leading:false,maxWait:0,trailing:false};var descriptor={configurable:false,enumerable:false,value:null,writable:false};var objectTypes={"boolean":false,"function":true,object:true,number:false,string:false,undefined:false};var stringEscapes={"\\":"\\","'":"'","\n":"n","\r":"r","	":"t","\u2028":"u2028","\u2029":"u2029"};var root=objectTypes[typeof window]&&window||this;var freeExports=objectTypes[typeof exports]&&exports&&!exports.nodeType&&exports;var freeModule=objectTypes[typeof module]&&module&&!module.nodeType&&module;var moduleExports=freeModule&&freeModule.exports===freeExports&&freeExports;var freeGlobal=objectTypes[typeof global]&&global;if(freeGlobal&&(freeGlobal.global===freeGlobal||freeGlobal.window===freeGlobal)){root=freeGlobal}function baseIndexOf(array,value,fromIndex){var index=(fromIndex||0)-1,length=array?array.length:0;while(++index<length){if(array[index]===value){return index}}return-1}function cacheIndexOf(cache,value){var type=typeof value;cache=cache.cache;if(type=="boolean"||value==null){return cache[value]?0:-1}if(type!="number"&&type!="string"){type="object"}var key=type=="number"?value:keyPrefix+value;cache=(cache=cache[type])&&cache[key];return type=="object"?cache&&baseIndexOf(cache,value)>-1?0:-1:cache?0:-1}function cachePush(value){var cache=this.cache,type=typeof value;if(type=="boolean"||value==null){cache[value]=true}else{if(type!="number"&&type!="string"){type="object"}var key=type=="number"?value:keyPrefix+value,typeCache=cache[type]||(cache[type]={});if(type=="object"){(typeCache[key]||(typeCache[key]=[])).push(value)}else{typeCache[key]=true}}}function charAtCallback(value){return value.charCodeAt(0)}function compareAscending(a,b){var ac=a.criteria,bc=b.criteria,index=-1,length=ac.length;while(++index<length){var value=ac[index],other=bc[index];if(value!==other){if(value>other||typeof value=="undefined"){return 1}if(value<other||typeof other=="undefined"){return-1}}}return a.index-b.index}function createCache(array){var index=-1,length=array.length,first=array[0],mid=array[length/2|0],last=array[length-1];if(first&&typeof first=="object"&&mid&&typeof mid=="object"&&last&&typeof last=="object"){return false}var cache=getObject();cache["false"]=cache["null"]=cache["true"]=cache["undefined"]=false;var result=getObject();result.array=array;result.cache=cache;result.push=cachePush;while(++index<length){result.push(array[index])}return result}function escapeStringChar(match){return"\\"+stringEscapes[match]}function getArray(){return arrayPool.pop()||[]}function getObject(){return objectPool.pop()||{array:null,cache:null,criteria:null,"false":false,index:0,"null":false,number:null,object:null,push:null,string:null,"true":false,undefined:false,value:null}}function releaseArray(array){array.length=0;if(arrayPool.length<maxPoolSize){arrayPool.push(array)}}function releaseObject(object){var cache=object.cache;if(cache){releaseObject(cache)}object.array=object.cache=object.criteria=object.object=object.number=object.string=object.value=null;if(objectPool.length<maxPoolSize){objectPool.push(object)}}function slice(array,start,end){start||(start=0);if(typeof end=="undefined"){end=array?array.length:0}var index=-1,length=end-start||0,result=Array(length<0?0:length);while(++index<length){result[index]=array[start+index]}return result}function runInContext(context){context=context?_.defaults(root.Object(),context,_.pick(root,contextProps)):root;var Array=context.Array,Boolean=context.Boolean,Date=context.Date,Function=context.Function,Math=context.Math,Number=context.Number,Object=context.Object,RegExp=context.RegExp,String=context.String,TypeError=context.TypeError;var arrayRef=[];var objectProto=Object.prototype;var oldDash=context._;var toString=objectProto.toString;var reNative=RegExp("^"+String(toString).replace(/[.*+?^${}()|[\]\\]/g,"\\$&").replace(/toString| for [^\]]+/g,".*?")+"$");var ceil=Math.ceil,clearTimeout=context.clearTimeout,floor=Math.floor,fnToString=Function.prototype.toString,getPrototypeOf=isNative(getPrototypeOf=Object.getPrototypeOf)&&getPrototypeOf,hasOwnProperty=objectProto.hasOwnProperty,push=arrayRef.push,setTimeout=context.setTimeout,splice=arrayRef.splice,unshift=arrayRef.unshift;var defineProperty=function(){try{var o={},func=isNative(func=Object.defineProperty)&&func,result=func(o,o,o)&&func}catch(e){}return result}();var nativeCreate=isNative(nativeCreate=Object.create)&&nativeCreate,nativeIsArray=isNative(nativeIsArray=Array.isArray)&&nativeIsArray,nativeIsFinite=context.isFinite,nativeIsNaN=context.isNaN,nativeKeys=isNative(nativeKeys=Object.keys)&&nativeKeys,nativeMax=Math.max,nativeMin=Math.min,nativeParseInt=context.parseInt,nativeRandom=Math.random;var ctorByClass={};ctorByClass[arrayClass]=Array;ctorByClass[boolClass]=Boolean;ctorByClass[dateClass]=Date;ctorByClass[funcClass]=Function;ctorByClass[objectClass]=Object;ctorByClass[numberClass]=Number;ctorByClass[regexpClass]=RegExp;ctorByClass[stringClass]=String;function lodash(value){return value&&typeof value=="object"&&!isArray(value)&&hasOwnProperty.call(value,"__wrapped__")?value:new lodashWrapper(value)}function lodashWrapper(value,chainAll){this.__chain__=!!chainAll;this.__wrapped__=value}lodashWrapper.prototype=lodash.prototype;var support=lodash.support={};support.funcDecomp=!isNative(context.WinRTError)&&reThis.test(runInContext);support.funcNames=typeof Function.name=="string";lodash.templateSettings={escape:/<%-([\s\S]+?)%>/g,evaluate:/<%([\s\S]+?)%>/g,interpolate:reInterpolate,variable:"",imports:{_:lodash}};function baseBind(bindData){var func=bindData[0],partialArgs=bindData[2],thisArg=bindData[4];function bound(){if(partialArgs){var args=slice(partialArgs);push.apply(args,arguments)}if(this instanceof bound){var thisBinding=baseCreate(func.prototype),result=func.apply(thisBinding,args||arguments);return isObject(result)?result:thisBinding}return func.apply(thisArg,args||arguments)}setBindData(bound,bindData);return bound}function baseClone(value,isDeep,callback,stackA,stackB){if(callback){var result=callback(value);if(typeof result!="undefined"){return result}}var isObj=isObject(value);if(isObj){var className=toString.call(value);if(!cloneableClasses[className]){return value}var ctor=ctorByClass[className];switch(className){case boolClass:case dateClass:return new ctor(+value);case numberClass:case stringClass:return new ctor(value);case regexpClass:result=ctor(value.source,reFlags.exec(value));result.lastIndex=value.lastIndex;return result}}else{return value}var isArr=isArray(value);if(isDeep){var initedStack=!stackA;stackA||(stackA=getArray());stackB||(stackB=getArray());var length=stackA.length;while(length--){if(stackA[length]==value){return stackB[length]}}result=isArr?ctor(value.length):{}}else{result=isArr?slice(value):assign({},value)}if(isArr){if(hasOwnProperty.call(value,"index")){result.index=value.index}if(hasOwnProperty.call(value,"input")){result.input=value.input}}if(!isDeep){return result}stackA.push(value);stackB.push(result);(isArr?forEach:forOwn)(value,function(objValue,key){result[key]=baseClone(objValue,isDeep,callback,stackA,stackB)});if(initedStack){releaseArray(stackA);releaseArray(stackB)}return result}function baseCreate(prototype,properties){return isObject(prototype)?nativeCreate(prototype):{}}if(!nativeCreate){baseCreate=function(){function Object(){}return function(prototype){if(isObject(prototype)){Object.prototype=prototype;var result=new Object;Object.prototype=null}return result||context.Object()}}()}function baseCreateCallback(func,thisArg,argCount){if(typeof func!="function"){return identity}if(typeof thisArg=="undefined"||!("prototype"in func)){return func}var bindData=func.__bindData__;if(typeof bindData=="undefined"){if(support.funcNames){bindData=!func.name}bindData=bindData||!support.funcDecomp;if(!bindData){var source=fnToString.call(func);if(!support.funcNames){bindData=!reFuncName.test(source)}if(!bindData){bindData=reThis.test(source);setBindData(func,bindData)}}}if(bindData===false||bindData!==true&&bindData[1]&1){return func}switch(argCount){case 1:return function(value){return func.call(thisArg,value)};case 2:return function(a,b){return func.call(thisArg,a,b)};case 3:return function(value,index,collection){return func.call(thisArg,value,index,collection)};case 4:return function(accumulator,value,index,collection){return func.call(thisArg,accumulator,value,index,collection)}}return bind(func,thisArg)}function baseCreateWrapper(bindData){var func=bindData[0],bitmask=bindData[1],partialArgs=bindData[2],partialRightArgs=bindData[3],thisArg=bindData[4],arity=bindData[5];var isBind=bitmask&1,isBindKey=bitmask&2,isCurry=bitmask&4,isCurryBound=bitmask&8,key=func;function bound(){var thisBinding=isBind?thisArg:this;if(partialArgs){var args=slice(partialArgs);push.apply(args,arguments)}if(partialRightArgs||isCurry){args||(args=slice(arguments));if(partialRightArgs){push.apply(args,partialRightArgs)}if(isCurry&&args.length<arity){bitmask|=16&~32;return baseCreateWrapper([func,isCurryBound?bitmask:bitmask&~3,args,null,thisArg,arity])}}args||(args=arguments);if(isBindKey){func=thisBinding[key]}if(this instanceof bound){thisBinding=baseCreate(func.prototype);var result=func.apply(thisBinding,args);return isObject(result)?result:thisBinding}return func.apply(thisBinding,args)}setBindData(bound,bindData);return bound}function baseDifference(array,values){var index=-1,indexOf=getIndexOf(),length=array?array.length:0,isLarge=length>=largeArraySize&&indexOf===baseIndexOf,result=[];
+
+if(isLarge){var cache=createCache(values);if(cache){indexOf=cacheIndexOf;values=cache}else{isLarge=false}}while(++index<length){var value=array[index];if(indexOf(values,value)<0){result.push(value)}}if(isLarge){releaseObject(values)}return result}function baseFlatten(array,isShallow,isStrict,fromIndex){var index=(fromIndex||0)-1,length=array?array.length:0,result=[];while(++index<length){var value=array[index];if(value&&typeof value=="object"&&typeof value.length=="number"&&(isArray(value)||isArguments(value))){if(!isShallow){value=baseFlatten(value,isShallow,isStrict)}var valIndex=-1,valLength=value.length,resIndex=result.length;result.length+=valLength;while(++valIndex<valLength){result[resIndex++]=value[valIndex]}}else if(!isStrict){result.push(value)}}return result}function baseIsEqual(a,b,callback,isWhere,stackA,stackB){if(callback){var result=callback(a,b);if(typeof result!="undefined"){return!!result}}if(a===b){return a!==0||1/a==1/b}var type=typeof a,otherType=typeof b;if(a===a&&!(a&&objectTypes[type])&&!(b&&objectTypes[otherType])){return false}if(a==null||b==null){return a===b}var className=toString.call(a),otherClass=toString.call(b);if(className==argsClass){className=objectClass}if(otherClass==argsClass){otherClass=objectClass}if(className!=otherClass){return false}switch(className){case boolClass:case dateClass:return+a==+b;case numberClass:return a!=+a?b!=+b:a==0?1/a==1/b:a==+b;case regexpClass:case stringClass:return a==String(b)}var isArr=className==arrayClass;if(!isArr){var aWrapped=hasOwnProperty.call(a,"__wrapped__"),bWrapped=hasOwnProperty.call(b,"__wrapped__");if(aWrapped||bWrapped){return baseIsEqual(aWrapped?a.__wrapped__:a,bWrapped?b.__wrapped__:b,callback,isWhere,stackA,stackB)}if(className!=objectClass){return false}var ctorA=a.constructor,ctorB=b.constructor;if(ctorA!=ctorB&&!(isFunction(ctorA)&&ctorA instanceof ctorA&&isFunction(ctorB)&&ctorB instanceof ctorB)&&("constructor"in a&&"constructor"in b)){return false}}var initedStack=!stackA;stackA||(stackA=getArray());stackB||(stackB=getArray());var length=stackA.length;while(length--){if(stackA[length]==a){return stackB[length]==b}}var size=0;result=true;stackA.push(a);stackB.push(b);if(isArr){length=a.length;size=b.length;result=size==length;if(result||isWhere){while(size--){var index=length,value=b[size];if(isWhere){while(index--){if(result=baseIsEqual(a[index],value,callback,isWhere,stackA,stackB)){break}}}else if(!(result=baseIsEqual(a[size],value,callback,isWhere,stackA,stackB))){break}}}}else{forIn(b,function(value,key,b){if(hasOwnProperty.call(b,key)){size++;return result=hasOwnProperty.call(a,key)&&baseIsEqual(a[key],value,callback,isWhere,stackA,stackB)}});if(result&&!isWhere){forIn(a,function(value,key,a){if(hasOwnProperty.call(a,key)){return result=--size>-1}})}}stackA.pop();stackB.pop();if(initedStack){releaseArray(stackA);releaseArray(stackB)}return result}function baseMerge(object,source,callback,stackA,stackB){(isArray(source)?forEach:forOwn)(source,function(source,key){var found,isArr,result=source,value=object[key];if(source&&((isArr=isArray(source))||isPlainObject(source))){var stackLength=stackA.length;while(stackLength--){if(found=stackA[stackLength]==source){value=stackB[stackLength];break}}if(!found){var isShallow;if(callback){result=callback(value,source);if(isShallow=typeof result!="undefined"){value=result}}if(!isShallow){value=isArr?isArray(value)?value:[]:isPlainObject(value)?value:{}}stackA.push(source);stackB.push(value);if(!isShallow){baseMerge(value,source,callback,stackA,stackB)}}}else{if(callback){result=callback(value,source);if(typeof result=="undefined"){result=source}}if(typeof result!="undefined"){value=result}}object[key]=value})}function baseRandom(min,max){return min+floor(nativeRandom()*(max-min+1))}function baseUniq(array,isSorted,callback){var index=-1,indexOf=getIndexOf(),length=array?array.length:0,result=[];var isLarge=!isSorted&&length>=largeArraySize&&indexOf===baseIndexOf,seen=callback||isLarge?getArray():result;if(isLarge){var cache=createCache(seen);indexOf=cacheIndexOf;seen=cache}while(++index<length){var value=array[index],computed=callback?callback(value,index,array):value;if(isSorted?!index||seen[seen.length-1]!==computed:indexOf(seen,computed)<0){if(callback||isLarge){seen.push(computed)}result.push(value)}}if(isLarge){releaseArray(seen.array);releaseObject(seen)}else if(callback){releaseArray(seen)}return result}function createAggregator(setter){return function(collection,callback,thisArg){var result={};callback=lodash.createCallback(callback,thisArg,3);var index=-1,length=collection?collection.length:0;if(typeof length=="number"){while(++index<length){var value=collection[index];setter(result,value,callback(value,index,collection),collection)}}else{forOwn(collection,function(value,key,collection){setter(result,value,callback(value,key,collection),collection)})}return result}}function createWrapper(func,bitmask,partialArgs,partialRightArgs,thisArg,arity){var isBind=bitmask&1,isBindKey=bitmask&2,isCurry=bitmask&4,isCurryBound=bitmask&8,isPartial=bitmask&16,isPartialRight=bitmask&32;if(!isBindKey&&!isFunction(func)){throw new TypeError}if(isPartial&&!partialArgs.length){bitmask&=~16;isPartial=partialArgs=false}if(isPartialRight&&!partialRightArgs.length){bitmask&=~32;isPartialRight=partialRightArgs=false}var bindData=func&&func.__bindData__;if(bindData&&bindData!==true){bindData=slice(bindData);if(bindData[2]){bindData[2]=slice(bindData[2])}if(bindData[3]){bindData[3]=slice(bindData[3])}if(isBind&&!(bindData[1]&1)){bindData[4]=thisArg}if(!isBind&&bindData[1]&1){bitmask|=8}if(isCurry&&!(bindData[1]&4)){bindData[5]=arity}if(isPartial){push.apply(bindData[2]||(bindData[2]=[]),partialArgs)}if(isPartialRight){unshift.apply(bindData[3]||(bindData[3]=[]),partialRightArgs)}bindData[1]|=bitmask;return createWrapper.apply(null,bindData)}var creater=bitmask==1||bitmask===17?baseBind:baseCreateWrapper;return creater([func,bitmask,partialArgs,partialRightArgs,thisArg,arity])}function escapeHtmlChar(match){return htmlEscapes[match]}function getIndexOf(){var result=(result=lodash.indexOf)===indexOf?baseIndexOf:result;return result}function isNative(value){return typeof value=="function"&&reNative.test(value)}var setBindData=!defineProperty?noop:function(func,value){descriptor.value=value;defineProperty(func,"__bindData__",descriptor)};function shimIsPlainObject(value){var ctor,result;if(!(value&&toString.call(value)==objectClass)||(ctor=value.constructor,isFunction(ctor)&&!(ctor instanceof ctor))){return false}forIn(value,function(value,key){result=key});return typeof result=="undefined"||hasOwnProperty.call(value,result)}function unescapeHtmlChar(match){return htmlUnescapes[match]}function isArguments(value){return value&&typeof value=="object"&&typeof value.length=="number"&&toString.call(value)==argsClass||false}var isArray=nativeIsArray||function(value){return value&&typeof value=="object"&&typeof value.length=="number"&&toString.call(value)==arrayClass||false};var shimKeys=function(object){var index,iterable=object,result=[];if(!iterable)return result;if(!objectTypes[typeof object])return result;for(index in iterable){if(hasOwnProperty.call(iterable,index)){result.push(index)}}return result};var keys=!nativeKeys?shimKeys:function(object){if(!isObject(object)){return[]}return nativeKeys(object)};var htmlEscapes={"&":"&amp;","<":"&lt;",">":"&gt;",'"':"&quot;","'":"&#39;"};var htmlUnescapes=invert(htmlEscapes);var reEscapedHtml=RegExp("("+keys(htmlUnescapes).join("|")+")","g"),reUnescapedHtml=RegExp("["+keys(htmlEscapes).join("")+"]","g");var assign=function(object,source,guard){var index,iterable=object,result=iterable;if(!iterable)return result;var args=arguments,argsIndex=0,argsLength=typeof guard=="number"?2:args.length;if(argsLength>3&&typeof args[argsLength-2]=="function"){var callback=baseCreateCallback(args[--argsLength-1],args[argsLength--],2)}else if(argsLength>2&&typeof args[argsLength-1]=="function"){callback=args[--argsLength]}while(++argsIndex<argsLength){iterable=args[argsIndex];if(iterable&&objectTypes[typeof iterable]){var ownIndex=-1,ownProps=objectTypes[typeof iterable]&&keys(iterable),length=ownProps?ownProps.length:0;while(++ownIndex<length){index=ownProps[ownIndex];result[index]=callback?callback(result[index],iterable[index]):iterable[index]}}}return result};function clone(value,isDeep,callback,thisArg){if(typeof isDeep!="boolean"&&isDeep!=null){thisArg=callback;callback=isDeep;isDeep=false}return baseClone(value,isDeep,typeof callback=="function"&&baseCreateCallback(callback,thisArg,1))}function cloneDeep(value,callback,thisArg){return baseClone(value,true,typeof callback=="function"&&baseCreateCallback(callback,thisArg,1))}function create(prototype,properties){var result=baseCreate(prototype);return properties?assign(result,properties):result}var defaults=function(object,source,guard){var index,iterable=object,result=iterable;if(!iterable)return result;var args=arguments,argsIndex=0,argsLength=typeof guard=="number"?2:args.length;while(++argsIndex<argsLength){iterable=args[argsIndex];if(iterable&&objectTypes[typeof iterable]){var ownIndex=-1,ownProps=objectTypes[typeof iterable]&&keys(iterable),length=ownProps?ownProps.length:0;while(++ownIndex<length){index=ownProps[ownIndex];if(typeof result[index]=="undefined")result[index]=iterable[index]}}}return result};function findKey(object,callback,thisArg){var result;callback=lodash.createCallback(callback,thisArg,3);forOwn(object,function(value,key,object){if(callback(value,key,object)){result=key;return false}});return result}function findLastKey(object,callback,thisArg){var result;callback=lodash.createCallback(callback,thisArg,3);forOwnRight(object,function(value,key,object){if(callback(value,key,object)){result=key;return false}});return result}var forIn=function(collection,callback,thisArg){var index,iterable=collection,result=iterable;if(!iterable)return result;if(!objectTypes[typeof iterable])return result;callback=callback&&typeof thisArg=="undefined"?callback:baseCreateCallback(callback,thisArg,3);for(index in iterable){if(callback(iterable[index],index,collection)===false)return result}return result};function forInRight(object,callback,thisArg){var pairs=[];forIn(object,function(value,key){pairs.push(key,value)});var length=pairs.length;callback=baseCreateCallback(callback,thisArg,3);while(length--){if(callback(pairs[length--],pairs[length],object)===false){break}}return object}var forOwn=function(collection,callback,thisArg){var index,iterable=collection,result=iterable;if(!iterable)return result;if(!objectTypes[typeof iterable])return result;callback=callback&&typeof thisArg=="undefined"?callback:baseCreateCallback(callback,thisArg,3);var ownIndex=-1,ownProps=objectTypes[typeof iterable]&&keys(iterable),length=ownProps?ownProps.length:0;while(++ownIndex<length){index=ownProps[ownIndex];if(callback(iterable[index],index,collection)===false)return result}return result};function forOwnRight(object,callback,thisArg){var props=keys(object),length=props.length;callback=baseCreateCallback(callback,thisArg,3);while(length--){var key=props[length];if(callback(object[key],key,object)===false){break}}return object}function functions(object){var result=[];forIn(object,function(value,key){if(isFunction(value)){result.push(key)}});return result.sort()}function has(object,key){return object?hasOwnProperty.call(object,key):false}function invert(object){var index=-1,props=keys(object),length=props.length,result={};while(++index<length){var key=props[index];result[object[key]]=key}return result}function isBoolean(value){return value===true||value===false||value&&typeof value=="object"&&toString.call(value)==boolClass||false}function isDate(value){return value&&typeof value=="object"&&toString.call(value)==dateClass||false}function isElement(value){return value&&value.nodeType===1||false}function isEmpty(value){var result=true;if(!value){return result}var className=toString.call(value),length=value.length;if(className==arrayClass||className==stringClass||className==argsClass||className==objectClass&&typeof length=="number"&&isFunction(value.splice)){return!length}forOwn(value,function(){return result=false});return result}function isEqual(a,b,callback,thisArg){return baseIsEqual(a,b,typeof callback=="function"&&baseCreateCallback(callback,thisArg,2))}function isFinite(value){return nativeIsFinite(value)&&!nativeIsNaN(parseFloat(value))}function isFunction(value){return typeof value=="function"}function isObject(value){return!!(value&&objectTypes[typeof value])}function isNaN(value){return isNumber(value)&&value!=+value}function isNull(value){return value===null}function isNumber(value){return typeof value=="number"||value&&typeof value=="object"&&toString.call(value)==numberClass||false}var isPlainObject=!getPrototypeOf?shimIsPlainObject:function(value){if(!(value&&toString.call(value)==objectClass)){return false}var valueOf=value.valueOf,objProto=isNative(valueOf)&&(objProto=getPrototypeOf(valueOf))&&getPrototypeOf(objProto);return objProto?value==objProto||getPrototypeOf(value)==objProto:shimIsPlainObject(value)};function isRegExp(value){return value&&typeof value=="object"&&toString.call(value)==regexpClass||false}function isString(value){return typeof value=="string"||value&&typeof value=="object"&&toString.call(value)==stringClass||false}function isUndefined(value){return typeof value=="undefined"}function mapValues(object,callback,thisArg){var result={};callback=lodash.createCallback(callback,thisArg,3);forOwn(object,function(value,key,object){result[key]=callback(value,key,object)});return result}function merge(object){var args=arguments,length=2;if(!isObject(object)){return object}if(typeof args[2]!="number"){length=args.length}if(length>3&&typeof args[length-2]=="function"){var callback=baseCreateCallback(args[--length-1],args[length--],2)}else if(length>2&&typeof args[length-1]=="function"){callback=args[--length]}var sources=slice(arguments,1,length),index=-1,stackA=getArray(),stackB=getArray();while(++index<length){baseMerge(object,sources[index],callback,stackA,stackB)}releaseArray(stackA);releaseArray(stackB);return object}function omit(object,callback,thisArg){var result={};if(typeof callback!="function"){var props=[];forIn(object,function(value,key){props.push(key)});props=baseDifference(props,baseFlatten(arguments,true,false,1));var index=-1,length=props.length;while(++index<length){var key=props[index];result[key]=object[key]}}else{callback=lodash.createCallback(callback,thisArg,3);forIn(object,function(value,key,object){if(!callback(value,key,object)){result[key]=value}})}return result}function pairs(object){var index=-1,props=keys(object),length=props.length,result=Array(length);while(++index<length){var key=props[index];result[index]=[key,object[key]]}return result}function pick(object,callback,thisArg){var result={};if(typeof callback!="function"){var index=-1,props=baseFlatten(arguments,true,false,1),length=isObject(object)?props.length:0;while(++index<length){var key=props[index];if(key in object){result[key]=object[key]}}}else{callback=lodash.createCallback(callback,thisArg,3);forIn(object,function(value,key,object){if(callback(value,key,object)){result[key]=value}})}return result}function transform(object,callback,accumulator,thisArg){var isArr=isArray(object);if(accumulator==null){if(isArr){accumulator=[]}else{var ctor=object&&object.constructor,proto=ctor&&ctor.prototype;accumulator=baseCreate(proto)}}if(callback){callback=lodash.createCallback(callback,thisArg,4);(isArr?forEach:forOwn)(object,function(value,index,object){return callback(accumulator,value,index,object)})}return accumulator}function values(object){var index=-1,props=keys(object),length=props.length,result=Array(length);while(++index<length){result[index]=object[props[index]]}return result}function at(collection){var args=arguments,index=-1,props=baseFlatten(args,true,false,1),length=args[2]&&args[2][args[1]]===collection?1:props.length,result=Array(length);while(++index<length){result[index]=collection[props[index]]}return result}function contains(collection,target,fromIndex){var index=-1,indexOf=getIndexOf(),length=collection?collection.length:0,result=false;fromIndex=(fromIndex<0?nativeMax(0,length+fromIndex):fromIndex)||0;if(isArray(collection)){result=indexOf(collection,target,fromIndex)>-1}else if(typeof length=="number"){result=(isString(collection)?collection.indexOf(target,fromIndex):indexOf(collection,target,fromIndex))>-1}else{forOwn(collection,function(value){if(++index>=fromIndex){return!(result=value===target)}})}return result}var countBy=createAggregator(function(result,value,key){hasOwnProperty.call(result,key)?result[key]++:result[key]=1});function every(collection,callback,thisArg){var result=true;callback=lodash.createCallback(callback,thisArg,3);var index=-1,length=collection?collection.length:0;if(typeof length=="number"){while(++index<length){if(!(result=!!callback(collection[index],index,collection))){break}}}else{forOwn(collection,function(value,index,collection){return result=!!callback(value,index,collection)})}return result}function filter(collection,callback,thisArg){var result=[];callback=lodash.createCallback(callback,thisArg,3);var index=-1,length=collection?collection.length:0;if(typeof length=="number"){while(++index<length){var value=collection[index];if(callback(value,index,collection)){result.push(value)}}}else{forOwn(collection,function(value,index,collection){if(callback(value,index,collection)){result.push(value)}})}return result}function find(collection,callback,thisArg){callback=lodash.createCallback(callback,thisArg,3);var index=-1,length=collection?collection.length:0;if(typeof length=="number"){while(++index<length){var value=collection[index];if(callback(value,index,collection)){return value}}}else{var result;forOwn(collection,function(value,index,collection){if(callback(value,index,collection)){result=value;return false}});return result}}function findLast(collection,callback,thisArg){var result;callback=lodash.createCallback(callback,thisArg,3);forEachRight(collection,function(value,index,collection){if(callback(value,index,collection)){result=value;return false}});return result}function forEach(collection,callback,thisArg){var index=-1,length=collection?collection.length:0;callback=callback&&typeof thisArg=="undefined"?callback:baseCreateCallback(callback,thisArg,3);if(typeof length=="number"){while(++index<length){if(callback(collection[index],index,collection)===false){break}}}else{forOwn(collection,callback)}return collection}function forEachRight(collection,callback,thisArg){var length=collection?collection.length:0;callback=callback&&typeof thisArg=="undefined"?callback:baseCreateCallback(callback,thisArg,3);if(typeof length=="number"){while(length--){if(callback(collection[length],length,collection)===false){break}}}else{var props=keys(collection);length=props.length;forOwn(collection,function(value,key,collection){key=props?props[--length]:--length;return callback(collection[key],key,collection)})}return collection}var groupBy=createAggregator(function(result,value,key){(hasOwnProperty.call(result,key)?result[key]:result[key]=[]).push(value)});var indexBy=createAggregator(function(result,value,key){result[key]=value});function invoke(collection,methodName){var args=slice(arguments,2),index=-1,isFunc=typeof methodName=="function",length=collection?collection.length:0,result=Array(typeof length=="number"?length:0);forEach(collection,function(value){result[++index]=(isFunc?methodName:value[methodName]).apply(value,args)});return result}function map(collection,callback,thisArg){var index=-1,length=collection?collection.length:0;callback=lodash.createCallback(callback,thisArg,3);if(typeof length=="number"){var result=Array(length);while(++index<length){result[index]=callback(collection[index],index,collection)}}else{result=[];forOwn(collection,function(value,key,collection){result[++index]=callback(value,key,collection)})}return result}function max(collection,callback,thisArg){var computed=-Infinity,result=computed;if(typeof callback!="function"&&thisArg&&thisArg[callback]===collection){callback=null}if(callback==null&&isArray(collection)){var index=-1,length=collection.length;while(++index<length){var value=collection[index];if(value>result){result=value}}}else{callback=callback==null&&isString(collection)?charAtCallback:lodash.createCallback(callback,thisArg,3);forEach(collection,function(value,index,collection){var current=callback(value,index,collection);if(current>computed){computed=current;result=value}})}return result}function min(collection,callback,thisArg){var computed=Infinity,result=computed;if(typeof callback!="function"&&thisArg&&thisArg[callback]===collection){callback=null}if(callback==null&&isArray(collection)){var index=-1,length=collection.length;while(++index<length){var value=collection[index];if(value<result){result=value}}}else{callback=callback==null&&isString(collection)?charAtCallback:lodash.createCallback(callback,thisArg,3);forEach(collection,function(value,index,collection){var current=callback(value,index,collection);if(current<computed){computed=current;result=value}})}return result}var pluck=map;function reduce(collection,callback,accumulator,thisArg){if(!collection)return accumulator;var noaccum=arguments.length<3;callback=lodash.createCallback(callback,thisArg,4);var index=-1,length=collection.length;if(typeof length=="number"){if(noaccum){accumulator=collection[++index]}while(++index<length){accumulator=callback(accumulator,collection[index],index,collection)}}else{forOwn(collection,function(value,index,collection){accumulator=noaccum?(noaccum=false,value):callback(accumulator,value,index,collection)})}return accumulator}function reduceRight(collection,callback,accumulator,thisArg){var noaccum=arguments.length<3;callback=lodash.createCallback(callback,thisArg,4);forEachRight(collection,function(value,index,collection){accumulator=noaccum?(noaccum=false,value):callback(accumulator,value,index,collection)});return accumulator}function reject(collection,callback,thisArg){callback=lodash.createCallback(callback,thisArg,3);return filter(collection,function(value,index,collection){return!callback(value,index,collection)})}function sample(collection,n,guard){if(collection&&typeof collection.length!="number"){collection=values(collection)}if(n==null||guard){return collection?collection[baseRandom(0,collection.length-1)]:undefined}var result=shuffle(collection);result.length=nativeMin(nativeMax(0,n),result.length);return result}function shuffle(collection){var index=-1,length=collection?collection.length:0,result=Array(typeof length=="number"?length:0);forEach(collection,function(value){var rand=baseRandom(0,++index);result[index]=result[rand];result[rand]=value});return result}function size(collection){var length=collection?collection.length:0;return typeof length=="number"?length:keys(collection).length}function some(collection,callback,thisArg){var result;callback=lodash.createCallback(callback,thisArg,3);var index=-1,length=collection?collection.length:0;if(typeof length=="number"){while(++index<length){if(result=callback(collection[index],index,collection)){break}}}else{forOwn(collection,function(value,index,collection){return!(result=callback(value,index,collection))})}return!!result}function sortBy(collection,callback,thisArg){var index=-1,isArr=isArray(callback),length=collection?collection.length:0,result=Array(typeof length=="number"?length:0);if(!isArr){callback=lodash.createCallback(callback,thisArg,3)}forEach(collection,function(value,key,collection){var object=result[++index]=getObject();if(isArr){object.criteria=map(callback,function(key){return value[key]})}else{(object.criteria=getArray())[0]=callback(value,key,collection)}object.index=index;object.value=value});length=result.length;result.sort(compareAscending);while(length--){var object=result[length];result[length]=object.value;if(!isArr){releaseArray(object.criteria)}releaseObject(object)}return result}function toArray(collection){if(collection&&typeof collection.length=="number"){return slice(collection)}return values(collection)}var where=filter;function compact(array){var index=-1,length=array?array.length:0,result=[];while(++index<length){var value=array[index];if(value){result.push(value)}}return result}function difference(array){return baseDifference(array,baseFlatten(arguments,true,true,1))}function findIndex(array,callback,thisArg){var index=-1,length=array?array.length:0;callback=lodash.createCallback(callback,thisArg,3);while(++index<length){if(callback(array[index],index,array)){return index}}return-1}function findLastIndex(array,callback,thisArg){var length=array?array.length:0;callback=lodash.createCallback(callback,thisArg,3);while(length--){if(callback(array[length],length,array)){return length}}return-1}function first(array,callback,thisArg){var n=0,length=array?array.length:0;if(typeof callback!="number"&&callback!=null){var index=-1;callback=lodash.createCallback(callback,thisArg,3);while(++index<length&&callback(array[index],index,array)){n++}}else{n=callback;if(n==null||thisArg){return array?array[0]:undefined}}return slice(array,0,nativeMin(nativeMax(0,n),length))}function flatten(array,isShallow,callback,thisArg){if(typeof isShallow!="boolean"&&isShallow!=null){thisArg=callback;callback=typeof isShallow!="function"&&thisArg&&thisArg[isShallow]===array?null:isShallow;isShallow=false}if(callback!=null){array=map(array,callback,thisArg)}return baseFlatten(array,isShallow)}function indexOf(array,value,fromIndex){if(typeof fromIndex=="number"){var length=array?array.length:0;fromIndex=fromIndex<0?nativeMax(0,length+fromIndex):fromIndex||0}else if(fromIndex){var index=sortedIndex(array,value);return array[index]===value?index:-1}return baseIndexOf(array,value,fromIndex)}function initial(array,callback,thisArg){var n=0,length=array?array.length:0;if(typeof callback!="number"&&callback!=null){var index=length;callback=lodash.createCallback(callback,thisArg,3);while(index--&&callback(array[index],index,array)){n++}}else{n=callback==null||thisArg?1:callback||n}return slice(array,0,nativeMin(nativeMax(0,length-n),length))}function intersection(){var args=[],argsIndex=-1,argsLength=arguments.length,caches=getArray(),indexOf=getIndexOf(),trustIndexOf=indexOf===baseIndexOf,seen=getArray();while(++argsIndex<argsLength){var value=arguments[argsIndex];if(isArray(value)||isArguments(value)){args.push(value);caches.push(trustIndexOf&&value.length>=largeArraySize&&createCache(argsIndex?args[argsIndex]:seen))}}var array=args[0],index=-1,length=array?array.length:0,result=[];outer:while(++index<length){var cache=caches[0];value=array[index];if((cache?cacheIndexOf(cache,value):indexOf(seen,value))<0){argsIndex=argsLength;(cache||seen).push(value);while(--argsIndex){cache=caches[argsIndex];if((cache?cacheIndexOf(cache,value):indexOf(args[argsIndex],value))<0){continue outer}}result.push(value)}}while(argsLength--){cache=caches[argsLength];if(cache){releaseObject(cache)}}releaseArray(caches);releaseArray(seen);return result}function last(array,callback,thisArg){var n=0,length=array?array.length:0;if(typeof callback!="number"&&callback!=null){var index=length;callback=lodash.createCallback(callback,thisArg,3);while(index--&&callback(array[index],index,array)){n++}}else{n=callback;if(n==null||thisArg){return array?array[length-1]:undefined}}return slice(array,nativeMax(0,length-n))}function lastIndexOf(array,value,fromIndex){var index=array?array.length:0;if(typeof fromIndex=="number"){index=(fromIndex<0?nativeMax(0,index+fromIndex):nativeMin(fromIndex,index-1))+1}while(index--){if(array[index]===value){return index}}return-1}function pull(array){var args=arguments,argsIndex=0,argsLength=args.length,length=array?array.length:0;while(++argsIndex<argsLength){var index=-1,value=args[argsIndex];while(++index<length){if(array[index]===value){splice.call(array,index--,1);length--}}}return array}function range(start,end,step){start=+start||0;step=typeof step=="number"?step:+step||1;if(end==null){end=start;start=0}var index=-1,length=nativeMax(0,ceil((end-start)/(step||1))),result=Array(length);while(++index<length){result[index]=start;start+=step}return result}function remove(array,callback,thisArg){var index=-1,length=array?array.length:0,result=[];callback=lodash.createCallback(callback,thisArg,3);while(++index<length){var value=array[index];if(callback(value,index,array)){result.push(value);splice.call(array,index--,1);length--}}return result}function rest(array,callback,thisArg){if(typeof callback!="number"&&callback!=null){var n=0,index=-1,length=array?array.length:0;callback=lodash.createCallback(callback,thisArg,3);while(++index<length&&callback(array[index],index,array)){n++}}else{n=callback==null||thisArg?1:nativeMax(0,callback)}return slice(array,n)}function sortedIndex(array,value,callback,thisArg){var low=0,high=array?array.length:low;callback=callback?lodash.createCallback(callback,thisArg,1):identity;value=callback(value);while(low<high){var mid=low+high>>>1;callback(array[mid])<value?low=mid+1:high=mid}return low}function union(){return baseUniq(baseFlatten(arguments,true,true))}function uniq(array,isSorted,callback,thisArg){if(typeof isSorted!="boolean"&&isSorted!=null){thisArg=callback;callback=typeof isSorted!="function"&&thisArg&&thisArg[isSorted]===array?null:isSorted;isSorted=false}if(callback!=null){callback=lodash.createCallback(callback,thisArg,3)}return baseUniq(array,isSorted,callback)}function without(array){return baseDifference(array,slice(arguments,1))}function xor(){var index=-1,length=arguments.length;while(++index<length){var array=arguments[index];if(isArray(array)||isArguments(array)){var result=result?baseUniq(baseDifference(result,array).concat(baseDifference(array,result))):array}}return result||[]}function zip(){var array=arguments.length>1?arguments:arguments[0],index=-1,length=array?max(pluck(array,"length")):0,result=Array(length<0?0:length);while(++index<length){result[index]=pluck(array,index)}return result}function zipObject(keys,values){var index=-1,length=keys?keys.length:0,result={};if(!values&&length&&!isArray(keys[0])){values=[]}while(++index<length){var key=keys[index];if(values){result[key]=values[index]}else if(key){result[key[0]]=key[1]}}return result}function after(n,func){if(!isFunction(func)){throw new TypeError}return function(){if(--n<1){return func.apply(this,arguments)}}}function bind(func,thisArg){return arguments.length>2?createWrapper(func,17,slice(arguments,2),null,thisArg):createWrapper(func,1,null,null,thisArg)}function bindAll(object){var funcs=arguments.length>1?baseFlatten(arguments,true,false,1):functions(object),index=-1,length=funcs.length;while(++index<length){var key=funcs[index];object[key]=createWrapper(object[key],1,null,null,object)}return object}function bindKey(object,key){return arguments.length>2?createWrapper(key,19,slice(arguments,2),null,object):createWrapper(key,3,null,null,object)}function compose(){var funcs=arguments,length=funcs.length;while(length--){if(!isFunction(funcs[length])){throw new TypeError}}return function(){var args=arguments,length=funcs.length;while(length--){args=[funcs[length].apply(this,args)]}return args[0]}}function curry(func,arity){arity=typeof arity=="number"?arity:+arity||func.length;return createWrapper(func,4,null,null,null,arity)}function debounce(func,wait,options){var args,maxTimeoutId,result,stamp,thisArg,timeoutId,trailingCall,lastCalled=0,maxWait=false,trailing=true;if(!isFunction(func)){throw new TypeError}wait=nativeMax(0,wait)||0;if(options===true){var leading=true;trailing=false}else if(isObject(options)){leading=options.leading;maxWait="maxWait"in options&&(nativeMax(wait,options.maxWait)||0);trailing="trailing"in options?options.trailing:trailing}var delayed=function(){var remaining=wait-(now()-stamp);if(remaining<=0){if(maxTimeoutId){clearTimeout(maxTimeoutId);
+
+}var isCalled=trailingCall;maxTimeoutId=timeoutId=trailingCall=undefined;if(isCalled){lastCalled=now();result=func.apply(thisArg,args);if(!timeoutId&&!maxTimeoutId){args=thisArg=null}}}else{timeoutId=setTimeout(delayed,remaining)}};var maxDelayed=function(){if(timeoutId){clearTimeout(timeoutId)}maxTimeoutId=timeoutId=trailingCall=undefined;if(trailing||maxWait!==wait){lastCalled=now();result=func.apply(thisArg,args);if(!timeoutId&&!maxTimeoutId){args=thisArg=null}}};return function(){args=arguments;stamp=now();thisArg=this;trailingCall=trailing&&(timeoutId||!leading);if(maxWait===false){var leadingCall=leading&&!timeoutId}else{if(!maxTimeoutId&&!leading){lastCalled=stamp}var remaining=maxWait-(stamp-lastCalled),isCalled=remaining<=0;if(isCalled){if(maxTimeoutId){maxTimeoutId=clearTimeout(maxTimeoutId)}lastCalled=stamp;result=func.apply(thisArg,args)}else if(!maxTimeoutId){maxTimeoutId=setTimeout(maxDelayed,remaining)}}if(isCalled&&timeoutId){timeoutId=clearTimeout(timeoutId)}else if(!timeoutId&&wait!==maxWait){timeoutId=setTimeout(delayed,wait)}if(leadingCall){isCalled=true;result=func.apply(thisArg,args)}if(isCalled&&!timeoutId&&!maxTimeoutId){args=thisArg=null}return result}}function defer(func){if(!isFunction(func)){throw new TypeError}var args=slice(arguments,1);return setTimeout(function(){func.apply(undefined,args)},1)}function delay(func,wait){if(!isFunction(func)){throw new TypeError}var args=slice(arguments,2);return setTimeout(function(){func.apply(undefined,args)},wait)}function memoize(func,resolver){if(!isFunction(func)){throw new TypeError}var memoized=function(){var cache=memoized.cache,key=resolver?resolver.apply(this,arguments):keyPrefix+arguments[0];return hasOwnProperty.call(cache,key)?cache[key]:cache[key]=func.apply(this,arguments)};memoized.cache={};return memoized}function once(func){var ran,result;if(!isFunction(func)){throw new TypeError}return function(){if(ran){return result}ran=true;result=func.apply(this,arguments);func=null;return result}}function partial(func){return createWrapper(func,16,slice(arguments,1))}function partialRight(func){return createWrapper(func,32,null,slice(arguments,1))}function throttle(func,wait,options){var leading=true,trailing=true;if(!isFunction(func)){throw new TypeError}if(options===false){leading=false}else if(isObject(options)){leading="leading"in options?options.leading:leading;trailing="trailing"in options?options.trailing:trailing}debounceOptions.leading=leading;debounceOptions.maxWait=wait;debounceOptions.trailing=trailing;return debounce(func,wait,debounceOptions)}function wrap(value,wrapper){return createWrapper(wrapper,16,[value])}function constant(value){return function(){return value}}function createCallback(func,thisArg,argCount){var type=typeof func;if(func==null||type=="function"){return baseCreateCallback(func,thisArg,argCount)}if(type!="object"){return property(func)}var props=keys(func),key=props[0],a=func[key];if(props.length==1&&a===a&&!isObject(a)){return function(object){var b=object[key];return a===b&&(a!==0||1/a==1/b)}}return function(object){var length=props.length,result=false;while(length--){if(!(result=baseIsEqual(object[props[length]],func[props[length]],null,true))){break}}return result}}function escape(string){return string==null?"":String(string).replace(reUnescapedHtml,escapeHtmlChar)}function identity(value){return value}function mixin(object,source,options){var chain=true,methodNames=source&&functions(source);if(!source||!options&&!methodNames.length){if(options==null){options=source}ctor=lodashWrapper;source=object;object=lodash;methodNames=functions(source)}if(options===false){chain=false}else if(isObject(options)&&"chain"in options){chain=options.chain}var ctor=object,isFunc=isFunction(ctor);forEach(methodNames,function(methodName){var func=object[methodName]=source[methodName];if(isFunc){ctor.prototype[methodName]=function(){var chainAll=this.__chain__,value=this.__wrapped__,args=[value];push.apply(args,arguments);var result=func.apply(object,args);if(chain||chainAll){if(value===result&&isObject(result)){return this}result=new ctor(result);result.__chain__=chainAll}return result}}})}function noConflict(){context._=oldDash;return this}function noop(){}var now=isNative(now=Date.now)&&now||function(){return(new Date).getTime()};var parseInt=nativeParseInt(whitespace+"08")==8?nativeParseInt:function(value,radix){return nativeParseInt(isString(value)?value.replace(reLeadingSpacesAndZeros,""):value,radix||0)};function property(key){return function(object){return object[key]}}function random(min,max,floating){var noMin=min==null,noMax=max==null;if(floating==null){if(typeof min=="boolean"&&noMax){floating=min;min=1}else if(!noMax&&typeof max=="boolean"){floating=max;noMax=true}}if(noMin&&noMax){max=1}min=+min||0;if(noMax){max=min;min=0}else{max=+max||0}if(floating||min%1||max%1){var rand=nativeRandom();return nativeMin(min+rand*(max-min+parseFloat("1e-"+((rand+"").length-1))),max)}return baseRandom(min,max)}function result(object,key){if(object){var value=object[key];return isFunction(value)?object[key]():value}}function template(text,data,options){var settings=lodash.templateSettings;text=String(text||"");options=defaults({},options,settings);var imports=defaults({},options.imports,settings.imports),importsKeys=keys(imports),importsValues=values(imports);var isEvaluating,index=0,interpolate=options.interpolate||reNoMatch,source="__p += '";var reDelimiters=RegExp((options.escape||reNoMatch).source+"|"+interpolate.source+"|"+(interpolate===reInterpolate?reEsTemplate:reNoMatch).source+"|"+(options.evaluate||reNoMatch).source+"|$","g");text.replace(reDelimiters,function(match,escapeValue,interpolateValue,esTemplateValue,evaluateValue,offset){interpolateValue||(interpolateValue=esTemplateValue);source+=text.slice(index,offset).replace(reUnescapedString,escapeStringChar);if(escapeValue){source+="' +\n__e("+escapeValue+") +\n'"}if(evaluateValue){isEvaluating=true;source+="';\n"+evaluateValue+";\n__p += '"}if(interpolateValue){source+="' +\n((__t = ("+interpolateValue+")) == null ? '' : __t) +\n'"}index=offset+match.length;return match});source+="';\n";var variable=options.variable,hasVariable=variable;if(!hasVariable){variable="obj";source="with ("+variable+") {\n"+source+"\n}\n"}source=(isEvaluating?source.replace(reEmptyStringLeading,""):source).replace(reEmptyStringMiddle,"$1").replace(reEmptyStringTrailing,"$1;");source="function("+variable+") {\n"+(hasVariable?"":variable+" || ("+variable+" = {});\n")+"var __t, __p = '', __e = _.escape"+(isEvaluating?", __j = Array.prototype.join;\n"+"function print() { __p += __j.call(arguments, '') }\n":";\n")+source+"return __p\n}";var sourceURL="\n/*\n//# sourceURL="+(options.sourceURL||"/lodash/template/source["+templateCounter++ +"]")+"\n*/";try{var result=Function(importsKeys,"return "+source+sourceURL).apply(undefined,importsValues)}catch(e){e.source=source;throw e}if(data){return result(data)}result.source=source;return result}function times(n,callback,thisArg){n=(n=+n)>-1?n:0;var index=-1,result=Array(n);callback=baseCreateCallback(callback,thisArg,1);while(++index<n){result[index]=callback(index)}return result}function unescape(string){return string==null?"":String(string).replace(reEscapedHtml,unescapeHtmlChar)}function uniqueId(prefix){var id=++idCounter;return String(prefix==null?"":prefix)+id}function chain(value){value=new lodashWrapper(value);value.__chain__=true;return value}function tap(value,interceptor){interceptor(value);return value}function wrapperChain(){this.__chain__=true;return this}function wrapperToString(){return String(this.__wrapped__)}function wrapperValueOf(){return this.__wrapped__}lodash.after=after;lodash.assign=assign;lodash.at=at;lodash.bind=bind;lodash.bindAll=bindAll;lodash.bindKey=bindKey;lodash.chain=chain;lodash.compact=compact;lodash.compose=compose;lodash.constant=constant;lodash.countBy=countBy;lodash.create=create;lodash.createCallback=createCallback;lodash.curry=curry;lodash.debounce=debounce;lodash.defaults=defaults;lodash.defer=defer;lodash.delay=delay;lodash.difference=difference;lodash.filter=filter;lodash.flatten=flatten;lodash.forEach=forEach;lodash.forEachRight=forEachRight;lodash.forIn=forIn;lodash.forInRight=forInRight;lodash.forOwn=forOwn;lodash.forOwnRight=forOwnRight;lodash.functions=functions;lodash.groupBy=groupBy;lodash.indexBy=indexBy;lodash.initial=initial;lodash.intersection=intersection;lodash.invert=invert;lodash.invoke=invoke;lodash.keys=keys;lodash.map=map;lodash.mapValues=mapValues;lodash.max=max;lodash.memoize=memoize;lodash.merge=merge;lodash.min=min;lodash.omit=omit;lodash.once=once;lodash.pairs=pairs;lodash.partial=partial;lodash.partialRight=partialRight;lodash.pick=pick;lodash.pluck=pluck;lodash.property=property;lodash.pull=pull;lodash.range=range;lodash.reject=reject;lodash.remove=remove;lodash.rest=rest;lodash.shuffle=shuffle;lodash.sortBy=sortBy;lodash.tap=tap;lodash.throttle=throttle;lodash.times=times;lodash.toArray=toArray;lodash.transform=transform;lodash.union=union;lodash.uniq=uniq;lodash.values=values;lodash.where=where;lodash.without=without;lodash.wrap=wrap;lodash.xor=xor;lodash.zip=zip;lodash.zipObject=zipObject;lodash.collect=map;lodash.drop=rest;lodash.each=forEach;lodash.eachRight=forEachRight;lodash.extend=assign;lodash.methods=functions;lodash.object=zipObject;lodash.select=filter;lodash.tail=rest;lodash.unique=uniq;lodash.unzip=zip;mixin(lodash);lodash.clone=clone;lodash.cloneDeep=cloneDeep;lodash.contains=contains;lodash.escape=escape;lodash.every=every;lodash.find=find;lodash.findIndex=findIndex;lodash.findKey=findKey;lodash.findLast=findLast;lodash.findLastIndex=findLastIndex;lodash.findLastKey=findLastKey;lodash.has=has;lodash.identity=identity;lodash.indexOf=indexOf;lodash.isArguments=isArguments;lodash.isArray=isArray;lodash.isBoolean=isBoolean;lodash.isDate=isDate;lodash.isElement=isElement;lodash.isEmpty=isEmpty;lodash.isEqual=isEqual;lodash.isFinite=isFinite;lodash.isFunction=isFunction;lodash.isNaN=isNaN;lodash.isNull=isNull;lodash.isNumber=isNumber;lodash.isObject=isObject;lodash.isPlainObject=isPlainObject;lodash.isRegExp=isRegExp;lodash.isString=isString;lodash.isUndefined=isUndefined;lodash.lastIndexOf=lastIndexOf;lodash.mixin=mixin;lodash.noConflict=noConflict;lodash.noop=noop;lodash.now=now;lodash.parseInt=parseInt;lodash.random=random;lodash.reduce=reduce;lodash.reduceRight=reduceRight;lodash.result=result;lodash.runInContext=runInContext;lodash.size=size;lodash.some=some;lodash.sortedIndex=sortedIndex;lodash.template=template;lodash.unescape=unescape;lodash.uniqueId=uniqueId;lodash.all=every;lodash.any=some;lodash.detect=find;lodash.findWhere=find;lodash.foldl=reduce;lodash.foldr=reduceRight;lodash.include=contains;lodash.inject=reduce;mixin(function(){var source={};forOwn(lodash,function(func,methodName){if(!lodash.prototype[methodName]){source[methodName]=func}});return source}(),false);lodash.first=first;lodash.last=last;lodash.sample=sample;lodash.take=first;lodash.head=first;forOwn(lodash,function(func,methodName){var callbackable=methodName!=="sample";if(!lodash.prototype[methodName]){lodash.prototype[methodName]=function(n,guard){var chainAll=this.__chain__,result=func(this.__wrapped__,n,guard);return!chainAll&&(n==null||guard&&!(callbackable&&typeof n=="function"))?result:new lodashWrapper(result,chainAll)}}});lodash.VERSION="2.4.1";lodash.prototype.chain=wrapperChain;lodash.prototype.toString=wrapperToString;lodash.prototype.value=wrapperValueOf;lodash.prototype.valueOf=wrapperValueOf;forEach(["join","pop","shift"],function(methodName){var func=arrayRef[methodName];lodash.prototype[methodName]=function(){var chainAll=this.__chain__,result=func.apply(this.__wrapped__,arguments);return chainAll?new lodashWrapper(result,chainAll):result}});forEach(["push","reverse","sort","unshift"],function(methodName){var func=arrayRef[methodName];lodash.prototype[methodName]=function(){func.apply(this.__wrapped__,arguments);return this}});forEach(["concat","slice","splice"],function(methodName){var func=arrayRef[methodName];lodash.prototype[methodName]=function(){return new lodashWrapper(func.apply(this.__wrapped__,arguments),this.__chain__)}});return lodash}var _=runInContext();if(typeof define=="function"&&typeof define.amd=="object"&&define.amd){root._=_;define(function(){return _})}else if(freeExports&&freeModule){if(moduleExports){(freeModule.exports=_)._=_}else{freeExports._=_}}else{root._=_}}).call(this)}).call(this,typeof global!=="undefined"?global:typeof self!=="undefined"?self:typeof window!=="undefined"?window:{})},{}]},{},[1])(1)});
\ No newline at end of file
diff --git a/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.css b/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.css
new file mode 100644
index 0000000000000..8481710828455
--- /dev/null
+++ b/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.css
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#dag-viz-graph svg path {
+  stroke: #444444;
+  stroke-width: 1.5px;
+}
+
+#dag-viz-graph svg g.cluster rect {
+  stroke-width: 4px;
+  stroke-opacity: 0.5;
+}
+
+#dag-viz-graph svg g.node circle,
+#dag-viz-graph svg g.node rect {
+  fill: #444444;
+}
+
+#dag-viz-graph svg g.node.cached circle,
+#dag-viz-graph svg g.node.cached rect {
+  fill: #FF0000;
+}
+
+/* Job page specific styles */
+
+#dag-viz-graph svg.job marker#marker-arrow path {
+  fill: #444444;
+  stroke-width: 0px;
+}
+
+#dag-viz-graph svg.job g.cluster rect {
+  fill: #FFFFFF;
+  stroke: #AADFFF;
+}
+
+#dag-viz-graph svg.job g.cluster[id*="stage"] rect {
+  stroke: #FFDDEE;
+  stroke-width: 6px;
+}
+
+#dag-viz-graph svg.job g#cross-stage-edges path {
+  fill: none;
+}
+
+#dag-viz-graph svg.job g.cluster text {
+  fill: #AAAAAA;
+}
+
+/* Stage page specific styles */
+
+#dag-viz-graph svg.stage g.cluster rect {
+  fill: #F0F8FF;
+  stroke: #AADFFF;
+}
+
+#dag-viz-graph svg.stage g.cluster[id*="stage"] rect {
+  fill: #FFFFFF;
+  stroke: #FFDDEE;
+  stroke-width: 6px;
+}
+
+#dag-viz-graph svg.stage g.node g.label text tspan {
+  fill: #FFFFFF;
+}
+
+#dag-viz-graph svg.stage g.cluster text {
+  fill: #444444;
+  font-weight: bold;
+}
diff --git a/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js b/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js
index 99b029477c5ff..eb9cf50055cb7 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js
@@ -23,10 +23,10 @@
  *   (2) an RDD and its operation scopes, and
  *   (3) an RDD's operation scopes and the stage / job hierarchy
  *
- * An operation scope is a general, named code block representing an operation
- * that instantiates RDDs (e.g. filter, textFile, reduceByKey). An operation
- * scope can be nested inside of other scopes if the corresponding RDD operation
- * invokes other such operations (for more detail, see o.a.s.rdd.operationScope).
+ * An operation scope is a general, named code block that instantiates RDDs
+ * (e.g. filter, textFile, reduceByKey). An operation scope can be nested inside
+ * of other scopes if the corresponding RDD operation invokes other such operations
+ * (for more detail, see o.a.s.rdd.RDDOperationScope).
  *
  * A stage may include one or more operation scopes if the RDD operations are
  * streamlined into one stage (e.g. rdd.map(...).filter(...).flatMap(...)).
@@ -52,14 +52,7 @@
  */
 
 var VizConstants = {
-  rddColor: "#444444",
-  rddCachedColor: "#FF0000",
-  rddOperationColor: "#AADFFF",
-  stageColor: "#FFDDEE",
-  clusterLabelColor: "#888888",
-  edgeColor: "#444444",
-  edgeWidth: "1.5px",
-  svgMarginX: 0,
+  svgMarginX: 20,
   svgMarginY: 20,
   stageSep: 50,
   graphPrefix: "graph_",
@@ -69,13 +62,21 @@ var VizConstants = {
   stageClusterPrefix: "cluster_stage_"
 };
 
-// Helper d3 accessors for the elements that contain our graph and its metadata
-function graphContainer() { return d3.select("#dag-viz-graph"); }
-function metadataContainer() { return d3.select("#dag-viz-metadata"); }
+var JobPageVizConstants = {
+  clusterLabelSize: 11,
+  stageClusterLabelSize: 14
+}
+
+var StagePageVizConstants = {
+  clusterLabelSize: 14,
+  stageClusterLabelSize: 18
+}
 
 /*
  * Show or hide the RDD DAG visualization.
+ *
  * The graph is only rendered the first time this is called.
+ * This is the narrow interface called from the Scala UI code.
  */
 function toggleDagViz(forJob) {
   var arrowSelector = ".expand-dag-viz-arrow";
@@ -113,70 +114,52 @@ function toggleDagViz(forJob) {
 function renderDagViz(forJob) {
 
   // If there is not a dot file to render, fail fast and report error
+  var jobOrStage = forJob ? "job" : "stage";
   if (metadataContainer().empty()) {
-    graphContainer().append("div").text(
-      "No visualization information available for this " + (forJob ? "job" : "stage"));
+    graphContainer()
+      .append("div")
+      .text("No visualization information available for this " + jobOrStage);
     return;
   }
 
-  var svg = graphContainer().append("svg");
+  // Render
+  var svg = graphContainer()
+    .append("svg")
+    .attr("class", jobOrStage);
   if (forJob) {
     renderDagVizForJob(svg);
   } else {
     renderDagVizForStage(svg);
   }
 
-  // Find cached RDDs
+  // Find cached RDDs and mark them as such
   metadataContainer().selectAll(".cached-rdd").each(function(v) {
     var nodeId = VizConstants.nodePrefix + d3.select(this).text();
-    graphContainer().selectAll("#" + nodeId).classed("cached", true);
+    svg.selectAll("#" + nodeId).classed("cached", true);
   });
 
-  // Set the appropriate SVG dimensions to ensure that all elements are displayed
-  var boundingBox = svg.node().getBBox();
-  svg.style("width", (boundingBox.width + VizConstants.svgMarginX) + "px");
-  svg.style("height", (boundingBox.height + VizConstants.svgMarginY) + "px");
-
-  // Add labels to clusters because dagre-d3 doesn't do this for us
-  svg.selectAll("g.cluster rect").each(function() {
-    var rect = d3.select(this);
-    var cluster = d3.select(this.parentNode);
-    // Shift the boxes up a little to make room for the labels
-    rect.attr("y", toFloat(rect.attr("y")) - 10);
-    rect.attr("height", toFloat(rect.attr("height")) + 10);
-    var labelX = toFloat(rect.attr("x")) + toFloat(rect.attr("width")) - 5;
-    var labelY = toFloat(rect.attr("y")) + 15;
-    var labelText = cluster.attr("name").replace(VizConstants.clusterPrefix, "");
-    cluster.append("text")
-      .attr("x", labelX)
-      .attr("y", labelY)
-      .attr("text-anchor", "end")
-      .text(labelText);
-  });
-
-  // We have shifted a few elements upwards, so we should fix the SVG views
-  var startX = -VizConstants.svgMarginX;
-  var startY = -VizConstants.svgMarginY;
-  var endX = toFloat(svg.style("width")) + VizConstants.svgMarginX;
-  var endY = toFloat(svg.style("height")) + VizConstants.svgMarginY;
-  var newViewBox = startX + " " + startY + " " + endX + " " + endY;
-  svg.attr("viewBox", newViewBox);
-
-  // Lastly, apply some custom style to the DAG
-  styleDagViz(forJob);
+  // More post-processing
+  drawClusterLabels(svg, forJob);
+  resizeSvg(svg);
 }
 
-/* Render the RDD DAG visualization for a stage. */
+/* Render the RDD DAG visualization on the stage page. */
 function renderDagVizForStage(svgContainer) {
   var metadata = metadataContainer().select(".stage-metadata");
   var dot = metadata.select(".dot-file").text();
-  var containerId = VizConstants.graphPrefix + metadata.attr("stageId");
+  var containerId = VizConstants.graphPrefix + metadata.attr("stage-id");
   var container = svgContainer.append("g").attr("id", containerId);
   renderDot(dot, container);
+
+  // Round corners on RDDs
+  svgContainer
+    .selectAll("g.node rect")
+    .attr("rx", "5")
+    .attr("ry", "5");
 }
 
 /*
- * Render the RDD DAG visualization for a job.
+ * Render the RDD DAG visualization on the job page.
  *
  * Due to limitations in dagre-d3, each stage is rendered independently so that
  * we have more control on how to position them. Unfortunately, this means we
@@ -186,32 +169,46 @@ function renderDagVizForStage(svgContainer) {
 function renderDagVizForJob(svgContainer) {
   var crossStageEdges = [];
 
+  // Each div.stage-metadata contains the information needed to generate the graph
+  // for a stage. This includes the DOT file produced from the appropriate UI listener,
+  // any incoming and outgoing edges, and any cached RDDs that belong to this stage.
   metadataContainer().selectAll(".stage-metadata").each(function(d, i) {
     var metadata = d3.select(this);
     var dot = metadata.select(".dot-file").text();
-    var stageId = metadata.attr("stageId");
+    var stageId = metadata.attr("stage-id");
     var containerId = VizConstants.graphPrefix + stageId;
-    // TODO: handle stage attempts
+    // Link each graph to the corresponding stage page (TODO: handle stage attempts)
     var stageLink =
       "/stages/stage/?id=" + stageId.replace(VizConstants.stagePrefix, "") + "&attempt=0";
     var container = svgContainer
-      .append("a").attr("xlink:href", stageLink)
-      .append("g").attr("id", containerId);
-    // Now we need to shift the container for this stage so it doesn't overlap
-    // with existing ones. We do not need to do this for the first stage.
+      .append("a")
+      .attr("xlink:href", stageLink)
+      .append("g")
+      .attr("id", containerId);
+
+    // Now we need to shift the container for this stage so it doesn't overlap with
+    // existing ones, taking into account the position and width of the last stage's
+    // container. We do not need to do this for the first stage of this job.
     if (i > 0) {
-      // Take into account the position and width of the last stage's container
-      var existingStages = stageClusters();
+      var existingStages = svgContainer
+        .selectAll("g.cluster")
+        .filter("[id*=\"" + VizConstants.stageClusterPrefix + "\"]");
       if (!existingStages.empty()) {
-        var lastStage = existingStages[0].pop();
-        var lastStageId = d3.select(lastStage).attr("id");
-        var lastStageWidth = toFloat(d3.select("#" + lastStageId + " rect").attr("width"));
-        var lastStagePosition = getAbsolutePosition(lastStageId);
+        var lastStage = d3.select(existingStages[0].pop());
+        var lastStageId = lastStage.attr("id");
+        var lastStageWidth = toFloat(svgContainer
+          .select("#" + lastStageId)
+          .select("rect")
+          .attr("width"));
+        var lastStagePosition = getAbsolutePosition(lastStage);
         var offset = lastStagePosition.x + lastStageWidth + VizConstants.stageSep;
         container.attr("transform", "translate(" + offset + ", 0)");
       }
     }
+
+    // Actually render the stage
     renderDot(dot, container);
+
     // If there are any incoming edges into this graph, keep track of them to render
     // them separately later. Note that we cannot draw them now because we need to
     // put these edges in a separate container that is on top of all stage graphs.
@@ -221,15 +218,7 @@ function renderDagVizForJob(svgContainer) {
     });
   });
 
-  // Draw edges that cross stages
-  if (crossStageEdges.length > 0) {
-    var container = svgContainer.append("g").attr("id", "cross-stage-edges");
-    for (var i = 0; i < crossStageEdges.length; i++) {
-      var fromRDDId = crossStageEdges[i][0];
-      var toRDDId = crossStageEdges[i][1];
-      connectRDDs(fromRDDId, toRDDId, container);
-    }
-  }
+  drawCrossStageEdges(crossStageEdges, svgContainer);
 }
 
 /* Render the dot file as an SVG in the given container. */
@@ -243,99 +232,156 @@ function renderDot(dot, container) {
   renderer(container, g);
 }
 
-/* Style the visualization we just rendered. */
-function styleDagViz(forJob) {
-  graphContainer().selectAll("svg g.cluster rect")
-    .style("fill", "white")
-    .style("stroke", VizConstants.rddOperationColor)
-    .style("stroke-width", "4px")
-    .style("stroke-opacity", "0.5");
-  graphContainer().selectAll("svg g.cluster text")
-    .attr("fill", VizConstants.clusterLabelColor)
-    .attr("font-size", "11px");
-  graphContainer().selectAll("svg path")
-    .style("stroke", VizConstants.edgeColor)
-    .style("stroke-width", VizConstants.edgeWidth);
-  stageClusters()
-    .select("rect")
-    .style("stroke", VizConstants.stageColor)
-    .style("strokeWidth", "6px");
-
-  // Put an arrow at the end of every edge
-  // We need to do this because we manually render some edges ourselves
-  // For these edges, we borrow the arrow marker generated by dagre-d3
-  var dagreD3Marker = graphContainer().select("svg g.edgePaths marker").node();
-  graphContainer().select("svg")
-    .append(function() { return dagreD3Marker.cloneNode(true); })
-    .attr("id", "marker-arrow")
-    .select("path")
-    .attr("fill", VizConstants.edgeColor)
-    .attr("strokeWidth", "0px");
-  graphContainer().selectAll("svg g > path").attr("marker-end", "url(#marker-arrow)");
-  graphContainer().selectAll("svg g.edgePaths def").remove(); // We no longer need these
-
-  // Apply any job or stage specific styles
+/* -------------------- *
+ * | Helper functions | *
+ * -------------------- */
+
+// Helper d3 accessors
+function graphContainer() { return d3.select("#dag-viz-graph"); }
+function metadataContainer() { return d3.select("#dag-viz-metadata"); }
+
+/*
+ * Helper function to create draw a label for each cluster.
+ *
+ * We need to do this manually because dagre-d3 does not support labeling clusters.
+ * In general, the clustering support for dagre-d3 is quite limited at this point.
+ */
+function drawClusterLabels(svgContainer, forJob) {
   if (forJob) {
-    styleDagVizForJob();
+    var clusterLabelSize = JobPageVizConstants.clusterLabelSize;
+    var stageClusterLabelSize = JobPageVizConstants.stageClusterLabelSize;
   } else {
-    styleDagVizForStage();
+    var clusterLabelSize = StagePageVizConstants.clusterLabelSize;
+    var stageClusterLabelSize = StagePageVizConstants.stageClusterLabelSize;
   }
+  svgContainer.selectAll("g.cluster").each(function() {
+    var cluster = d3.select(this);
+    var isStage = cluster.attr("id").indexOf(VizConstants.stageClusterPrefix) > -1;
+    var labelSize = isStage ? stageClusterLabelSize : clusterLabelSize;
+    drawClusterLabel(cluster, labelSize);
+  });
 }
 
-/* Apply job-page-specific style to the visualization. */
-function styleDagVizForJob() {
-  graphContainer().selectAll("svg g.node circle")
-    .style("fill", VizConstants.rddColor);
-  // TODO: add a legend to explain what a highlighted dot means
-  graphContainer().selectAll("svg g.cached circle")
-    .style("fill", VizConstants.rddCachedColor);
-  graphContainer().selectAll("svg g#cross-stage-edges path")
-    .style("fill", "none");
+/*
+ * Helper function to draw a label for the given cluster element based on its name.
+ *
+ * In the process, we need to expand the bounding box to make room for the label.
+ * We need to do this because dagre-d3 did not take this into account when it first
+ * rendered the bounding boxes. Note that this means we need to adjust the view box
+ * of the SVG afterwards since we shifted a few boxes around.
+ */
+function drawClusterLabel(d3cluster, fontSize) {
+  var cluster = d3cluster;
+  var rect = d3cluster.select("rect");
+  rect.attr("y", toFloat(rect.attr("y")) - fontSize);
+  rect.attr("height", toFloat(rect.attr("height")) + fontSize);
+  var labelX = toFloat(rect.attr("x")) + toFloat(rect.attr("width")) - fontSize / 2;
+  var labelY = toFloat(rect.attr("y")) + fontSize * 1.5;
+  var labelText = cluster.attr("name").replace(VizConstants.clusterPrefix, "");
+  cluster.append("text")
+    .attr("x", labelX)
+    .attr("y", labelY)
+    .attr("text-anchor", "end")
+    .style("font-size", fontSize)
+    .text(labelText);
 }
 
-/* Apply stage-page-specific style to the visualization. */
-function styleDagVizForStage() {
-  graphContainer().selectAll("svg g.node rect")
-    .style("fill", "none")
-    .style("stroke", VizConstants.rddColor)
-    .style("stroke-width", "2px")
-    .attr("rx", "5") // round corners
-    .attr("ry", "5");
-    // TODO: add a legend to explain what a highlighted RDD means
-  graphContainer().selectAll("svg g.cached rect")
-    .style("stroke", VizConstants.rddCachedColor);
-  graphContainer().selectAll("svg g.node g.label text tspan")
-    .style("fill", VizConstants.rddColor);
+/*
+ * Helper function to size the SVG appropriately such that all elements are displyed.
+ * This assumes that all outermost elements are clusters (rectangles).
+ */
+function resizeSvg(svg) {
+  var allClusters = svg.selectAll("g.cluster rect")[0];
+  var startX = -VizConstants.svgMarginX +
+    toFloat(d3.min(allClusters, function(e) {
+      return getAbsolutePosition(d3.select(e)).x;
+    }));
+  var startY = -VizConstants.svgMarginY +
+    toFloat(d3.min(allClusters, function(e) {
+      return getAbsolutePosition(d3.select(e)).y;
+    }));
+  var endX = VizConstants.svgMarginX +
+    toFloat(d3.max(allClusters, function(e) {
+      var t = d3.select(e)
+      return getAbsolutePosition(t).x + toFloat(t.attr("width"));
+    }));
+  var endY = VizConstants.svgMarginY +
+    toFloat(d3.max(allClusters, function(e) {
+      var t = d3.select(e)
+      return getAbsolutePosition(t).y + toFloat(t.attr("height"));
+    }));
+  var width = endX - startX;
+  var height = endY - startY;
+  svg.attr("viewBox", startX + " " + startY + " " + width + " " + height)
+     .attr("width", width)
+     .attr("height", height);
 }
 
 /*
- * (Job page only) Helper method to compute the absolute
- * position of the group element identified by the given ID.
+ * (Job page only) Helper function to draw edges that cross stage boundaries.
+ * We need to do this manually because we render each stage separately in dagre-d3.
  */
-function getAbsolutePosition(groupId) {
-  var obj = d3.select("#" + groupId).filter("g");
-  var _x = 0, _y = 0;
+function drawCrossStageEdges(edges, svgContainer) {
+  if (edges.length == 0) {
+    return;
+  }
+  // Draw the paths first
+  var edgesContainer = svgContainer.append("g").attr("id", "cross-stage-edges");
+  for (var i = 0; i < edges.length; i++) {
+    var fromRDDId = edges[i][0];
+    var toRDDId = edges[i][1];
+    connectRDDs(fromRDDId, toRDDId, edgesContainer, svgContainer);
+  }
+  // Now draw the arrows by borrowing the arrow marker generated by dagre-d3
+  var dagreD3Marker = svgContainer.select("g.edgePaths marker").node();
+  if (!dagreD3Marker.empty()) {
+    svgContainer
+      .append(function() { return dagreD3Marker.cloneNode(true); })
+      .attr("id", "marker-arrow")
+    svgContainer.selectAll("g > path").attr("marker-end", "url(#marker-arrow)");
+    svgContainer.selectAll("g.edgePaths def").remove(); // We no longer need these
+  }
+}
+
+/*
+ * (Job page only) Helper function to compute the absolute
+ * position of the specified element in our graph.
+ */
+function getAbsolutePosition(d3selection) {
+  if (d3selection.empty()) {
+    throw "Attempted to get absolute position of an empty selection.";
+  }
+  var obj = d3selection;
+  var _x = toFloat(obj.attr("x")) || 0;
+  var _y = toFloat(obj.attr("y")) || 0;
   while (!obj.empty()) {
     var transformText = obj.attr("transform");
-    var translate = d3.transform(transformText).translate
-    _x += translate[0];
-    _y += translate[1];
-    obj = d3.select(obj.node().parentNode).filter("g")
+    if (transformText) {
+      var translate = d3.transform(transformText).translate;
+      _x += toFloat(translate[0]);
+      _y += toFloat(translate[1]);
+    }
+    // Climb upwards to find how our parents are translated
+    obj = d3.select(obj.node().parentNode);
+    // Stop when we've reached the graph container itself
+    if (obj.node() == graphContainer().node()) {
+      break;
+    }
   }
   return { x: _x, y: _y };
 }
 
-/* (Job page only) Connect two RDD nodes with a curved edge. */
-function connectRDDs(fromRDDId, toRDDId, container) {
+/* (Job page only) Helper function to connect two RDDs with a curved edge. */
+function connectRDDs(fromRDDId, toRDDId, edgesContainer, svgContainer) {
   var fromNodeId = VizConstants.nodePrefix + fromRDDId;
-  var toNodeId = VizConstants.nodePrefix + toRDDId
-  var fromPos = getAbsolutePosition(fromNodeId);
-  var toPos = getAbsolutePosition(toNodeId);
+  var toNodeId = VizConstants.nodePrefix + toRDDId;
+  var fromPos = getAbsolutePosition(svgContainer.select("#" + fromNodeId));
+  var toPos = getAbsolutePosition(svgContainer.select("#" + toNodeId));
 
   // On the job page, RDDs are rendered as dots (circles). When rendering the path,
   // we need to account for the radii of these circles. Otherwise the arrow heads
   // will bleed into the circle itself.
-  var delta = toFloat(graphContainer()
+  var delta = toFloat(svgContainer
     .select("g.node#" + toNodeId)
     .select("circle")
     .attr("r"));
@@ -375,18 +421,15 @@ function connectRDDs(fromRDDId, toRDDId, container) {
   }
 
   var line = d3.svg.line().interpolate("basis");
-  container.append("path").datum(points).attr("d", line);
+  edgesContainer.append("path").datum(points).attr("d", line);
 }
 
-/* Helper d3 accessor to clusters that represent stages. */
-function stageClusters() {
-  return graphContainer().selectAll("g.cluster").filter(function() {
-    return d3.select(this).attr("id").indexOf(VizConstants.stageClusterPrefix) > -1;
-  });
-}
-
-/* Helper method to convert attributes to numeric values. */
+/* Helper function to convert attributes to numeric values. */
 function toFloat(f) {
-  return parseFloat(f.replace(/px$/, ""));
+  if (f) {
+    return parseFloat(f.toString().replace(/px$/, ""));
+  } else {
+    return f;
+  }
 }
 
diff --git a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
index 2f3fb181e4026..e2d03f8342315 100644
--- a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
+++ b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
@@ -156,13 +156,10 @@ private[spark] object UIUtils extends Logging {
 
   def commonHeaderNodes: Seq[Node] = {
     <meta http-equiv="Content-type" content="text/html; charset=utf-8" />
-    <link rel="stylesheet" href={prependBaseUri("/static/bootstrap.min.css")}
-          type="text/css" />
-    <link rel="stylesheet" href={prependBaseUri("/static/webui.css")}
-          type="text/css" />
-    <link rel="stylesheet" href={prependBaseUri("/static/vis.min.css")}
-          typ="text/css" />
-    <link rel="stylesheet" href={prependBaseUri("/static/timeline-view.css")}></link>
+    <link rel="stylesheet" href={prependBaseUri("/static/bootstrap.min.css")} type="text/css" />
+    <link rel="stylesheet" href={prependBaseUri("/static/webui.css")} type="text/css" />
+    <link rel="stylesheet" href={prependBaseUri("/static/vis.min.css")} type="text/css" />
+    <link rel="stylesheet" href={prependBaseUri("/static/timeline-view.css")} type="text/css" />
     <script src={prependBaseUri("/static/sorttable.js")} ></script>
     <script src={prependBaseUri("/static/jquery-1.11.1.min.js")}></script>
     <script src={prependBaseUri("/static/vis.min.js")}></script>
@@ -174,6 +171,7 @@ private[spark] object UIUtils extends Logging {
   }
 
   def vizHeaderNodes: Seq[Node] = {
+    <link rel="stylesheet" href={prependBaseUri("/static/spark-dag-viz.css")} type="text/css" />
     <script src={prependBaseUri("/static/d3.min.js")}></script>
     <script src={prependBaseUri("/static/dagre-d3.min.js")}></script>
     <script src={prependBaseUri("/static/graphlib-dot.min.js")}></script>
@@ -358,7 +356,7 @@ private[spark] object UIUtils extends Logging {
       <div id="dag-viz-metadata">
         {
           graphs.map { g =>
-            <div class="stage-metadata" stageId={g.rootCluster.id} style="display:none">
+            <div class="stage-metadata" stage-id={g.rootCluster.id} style="display:none">
               <div class="dot-file">{RDDOperationGraph.makeDotFile(g, forJob)}</div>
               { g.incomingEdges.map { e => <div class="incoming-edge">{e.fromId},{e.toId}</div> } }
               { g.outgoingEdges.map { e => <div class="outgoing-edge">{e.fromId},{e.toId}</div> } }
diff --git a/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraph.scala b/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraph.scala
index a18c193540ce3..edf005f7f325a 100644
--- a/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraph.scala
+++ b/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraph.scala
@@ -181,22 +181,29 @@ private[ui] object RDDOperationGraph extends Logging {
     if (forJob) {
       s"""${node.id} [label=" " shape="circle" padding="5" labelStyle="font-size: 0"]"""
     } else {
-      s"""${node.id} [label="${node.name} (${node.id})"]"""
+      s"""${node.id} [label="${node.name} (${node.id})" padding="5" labelStyle="font-size: 10"]"""
     }
   }
 
   /** Return the dot representation of a subgraph in an RDDOperationGraph. */
   private def makeDotSubgraph(
-      scope: RDDOperationCluster,
+      cluster: RDDOperationCluster,
       forJob: Boolean,
       indent: String): String = {
     val subgraph = new StringBuilder
-    subgraph.append(indent + s"subgraph cluster${scope.id} {\n")
-    subgraph.append(indent + s"""  label="${scope.name}";\n""")
-    scope.childNodes.foreach { node =>
+    // TODO: move specific graph properties like these to spark-dag-viz.js
+    val paddingTop = if (forJob) 10 else 20
+    subgraph.append(indent + s"subgraph cluster${cluster.id} {\n")
+    subgraph.append(indent + s"""  label="${cluster.name}";\n""")
+    // If there are nested clusters, add some padding
+    // Do this for the stage page because we use bigger fonts there
+    if (cluster.childClusters.nonEmpty) {
+      subgraph.append(indent + s"""  paddingTop="$paddingTop";\n""")
+    }
+    cluster.childNodes.foreach { node =>
       subgraph.append(indent + s"  ${makeDotNode(node, forJob)};\n")
     }
-    scope.childClusters.foreach { cscope =>
+    cluster.childClusters.foreach { cscope =>
       subgraph.append(makeDotSubgraph(cscope, forJob, indent + "  "))
     }
     subgraph.append(indent + "}\n")

From 71a452b61c43eaf625cacadd20c886335312ba59 Mon Sep 17 00:00:00 2001
From: Andrew Or <andrew@databricks.com>
Date: Wed, 6 May 2015 18:02:08 -0700
Subject: [PATCH 07/48] [HOT FIX] For DAG visualization #5954

---
 .../resources/org/apache/spark/ui/static/spark-dag-viz.js     | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js b/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js
index eb9cf50055cb7..76eb2c464139f 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js
@@ -333,10 +333,10 @@ function drawCrossStageEdges(edges, svgContainer) {
     connectRDDs(fromRDDId, toRDDId, edgesContainer, svgContainer);
   }
   // Now draw the arrows by borrowing the arrow marker generated by dagre-d3
-  var dagreD3Marker = svgContainer.select("g.edgePaths marker").node();
+  var dagreD3Marker = svgContainer.select("g.edgePaths marker");
   if (!dagreD3Marker.empty()) {
     svgContainer
-      .append(function() { return dagreD3Marker.cloneNode(true); })
+      .append(function() { return dagreD3Marker.node().cloneNode(true); })
       .attr("id", "marker-arrow")
     svgContainer.selectAll("g > path").attr("marker-end", "url(#marker-arrow)");
     svgContainer.selectAll("g.edgePaths def").remove(); // We no longer need these

From 14502d5e569f8460d3e2c973a4277441307433cc Mon Sep 17 00:00:00 2001
From: zsxwing <zsxwing@gmail.com>
Date: Wed, 6 May 2015 18:07:00 -0700
Subject: [PATCH 08/48] [SPARK-7405] [STREAMING] Fix the bug that
 ReceiverInputDStream doesn't report InputInfo

The bug is because SPARK-7139 removed some codes from SPARK-7112 unintentionally here: https://github.com/apache/spark/commit/1854ac326a9cc6014817d8df30ed0458eee5d7d1#diff-5c8651dd78abd20439b8eb938175075dL72

This PR just added them back and added some assertions in the tests to verify it.

Author: zsxwing <zsxwing@gmail.com>

Closes #5950 from zsxwing/SPARK-7405 and squashes the following commits:

675f5d9 [zsxwing] Fix the bug that ReceiverInputDStream doesn't report InputInfo
---
 .../spark/streaming/dstream/ReceiverInputDStream.scala     | 5 +++++
 .../org/apache/spark/streaming/InputStreamsSuite.scala     | 7 +++++++
 2 files changed, 12 insertions(+)

diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/ReceiverInputDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/ReceiverInputDStream.scala
index 15d9710d37cd4..5cfe43a1ce726 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/ReceiverInputDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/ReceiverInputDStream.scala
@@ -24,6 +24,7 @@ import org.apache.spark.storage.BlockId
 import org.apache.spark.streaming._
 import org.apache.spark.streaming.rdd.WriteAheadLogBackedBlockRDD
 import org.apache.spark.streaming.receiver.Receiver
+import org.apache.spark.streaming.scheduler.InputInfo
 import org.apache.spark.streaming.util.WriteAheadLogUtils
 
 /**
@@ -68,6 +69,10 @@ abstract class ReceiverInputDStream[T: ClassTag](@transient ssc_ : StreamingCont
         val blockInfos = receiverTracker.getBlocksOfBatch(validTime).getOrElse(id, Seq.empty)
         val blockIds = blockInfos.map { _.blockId.asInstanceOf[BlockId] }.toArray
 
+        // Register the input blocks information into InputInfoTracker
+        val inputInfo = InputInfo(id, blockInfos.map(_.numRecords).sum)
+        ssc.scheduler.inputInfoTracker.reportInfo(validTime, inputInfo)
+
         // Are WAL record handles present with all the blocks
         val areWALRecordHandlesPresent = blockInfos.forall { _.walRecordHandleOption.nonEmpty }
 
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/InputStreamsSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/InputStreamsSuite.scala
index 60745325029f6..93e6b0cd7c661 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/InputStreamsSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/InputStreamsSuite.scala
@@ -50,6 +50,8 @@ class InputStreamsSuite extends TestSuiteBase with BeforeAndAfter {
 
       // Set up the streaming context and input streams
       withStreamingContext(new StreamingContext(conf, batchDuration)) { ssc =>
+        ssc.addStreamingListener(ssc.progressListener)
+
         val input = Seq(1, 2, 3, 4, 5)
         // Use "batchCount" to make sure we check the result after all batches finish
         val batchCounter = new BatchCounter(ssc)
@@ -72,6 +74,11 @@ class InputStreamsSuite extends TestSuiteBase with BeforeAndAfter {
         if (!batchCounter.waitUntilBatchesCompleted(input.size, 30000)) {
           fail("Timeout: cannot finish all batches in 30 seconds")
         }
+
+        // Verify all "InputInfo"s have been reported
+        assert(ssc.progressListener.numTotalReceivedRecords === input.size)
+        assert(ssc.progressListener.numTotalProcessedRecords === input.size)
+
         logInfo("Stopping server")
         testServer.stop()
         logInfo("Stopping context")

From 773aa25252f29ca25dbb1ee495b530557fe79405 Mon Sep 17 00:00:00 2001
From: Xiangrui Meng <meng@databricks.com>
Date: Wed, 6 May 2015 22:29:07 -0700
Subject: [PATCH 09/48] [SPARK-7432] [MLLIB] disable cv doctest

Temporarily disable flaky doctest for CrossValidator. jkbradley

Author: Xiangrui Meng <meng@databricks.com>

Closes #5962 from mengxr/disable-pyspark-cv-test and squashes the following commits:

5db7e5b [Xiangrui Meng] disable cv doctest
---
 python/pyspark/ml/tuning.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/python/pyspark/ml/tuning.py b/python/pyspark/ml/tuning.py
index f6cf2c3439ba5..1e04c37fca53e 100644
--- a/python/pyspark/ml/tuning.py
+++ b/python/pyspark/ml/tuning.py
@@ -103,10 +103,10 @@ class CrossValidator(Estimator):
     >>> grid = ParamGridBuilder().addGrid(lr.maxIter, [0, 1, 5]).build()
     >>> evaluator = BinaryClassificationEvaluator()
     >>> cv = CrossValidator(estimator=lr, estimatorParamMaps=grid, evaluator=evaluator)
-    >>> cvModel = cv.fit(dataset)
-    >>> expected = lr.fit(dataset, {lr.maxIter: 5}).transform(dataset)
-    >>> cvModel.transform(dataset).collect() == expected.collect()
-    True
+    >>> # SPARK-7432: The following test is flaky.
+    >>> # cvModel = cv.fit(dataset)
+    >>> # expected = lr.fit(dataset, {lr.maxIter: 5}).transform(dataset)
+    >>> # cvModel.transform(dataset).collect() == expected.collect()
     """
 
     # a placeholder to make it appear in the generated doc

From 9cfa9a516ed991de6c5900c7285b47380a396142 Mon Sep 17 00:00:00 2001
From: Sun Rui <rui.sun@intel.com>
Date: Wed, 6 May 2015 22:48:16 -0700
Subject: [PATCH 10/48] [SPARK-6812] [SPARKR] filter() on DataFrame does not
 work as expected.

According to the R manual: https://stat.ethz.ch/R-manual/R-devel/library/base/html/Startup.html,
" if a function .First is found on the search path, it is executed as .First(). Finally, function .First.sys() in the base package is run. This calls require to attach the default packages specified by options("defaultPackages")."
In .First() in profile/shell.R, we load SparkR package. This means SparkR package is loaded before default packages. If there are same names in default packages, they will overwrite those in SparkR. This is why filter() in SparkR is masked by filter() in stats, which is usually in the default package list.
We need to make sure SparkR is loaded after default packages. The solution is to append SparkR to default packages, instead of loading SparkR in .First().

BTW, I'd like to discuss our policy on how to solve name conflict. Previously, we rename API names from Scala API if there is name conflict with base or other commonly-used packages. However, from long term perspective, this is not good for API stability, because we can't predict name conflicts, for example, if in the future a name added in base package conflicts with an API in SparkR? So the better policy is to keep API name same as Scala's without worrying about name conflicts. When users use SparkR, they should load SparkR as last package, so that all API names are effective. Use can explicitly use :: to refer to hidden names from other packages. If we agree on this, I can submit a JIRA issue to change back some rename API methods, for example, DataFrame.sortDF().

Author: Sun Rui <rui.sun@intel.com>

Closes #5938 from sun-rui/SPARK-6812 and squashes the following commits:

b569145 [Sun Rui] [SPARK-6812][SparkR] filter() on DataFrame does not work as expected.
---
 R/pkg/inst/profile/shell.R | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/R/pkg/inst/profile/shell.R b/R/pkg/inst/profile/shell.R
index 7a7f2031152a0..33478d9e29995 100644
--- a/R/pkg/inst/profile/shell.R
+++ b/R/pkg/inst/profile/shell.R
@@ -20,11 +20,13 @@
   .libPaths(c(file.path(home, "R", "lib"), .libPaths()))
   Sys.setenv(NOAWT=1)
 
-  library(utils)
-  library(SparkR)
-  sc <- sparkR.init(Sys.getenv("MASTER", unset = ""))
+  # Make sure SparkR package is the last loaded one
+  old <- getOption("defaultPackages")
+  options(defaultPackages = c(old, "SparkR"))
+
+  sc <- SparkR::sparkR.init(Sys.getenv("MASTER", unset = ""))
   assign("sc", sc, envir=.GlobalEnv)
-  sqlCtx <- sparkRSQL.init(sc)
+  sqlCtx <- SparkR::sparkRSQL.init(sc)
   assign("sqlCtx", sqlCtx, envir=.GlobalEnv)
   cat("\n Welcome to SparkR!")
   cat("\n Spark context is available as sc, SQL context is available as sqlCtx\n")

From 2d6612cc8b98f767d73c4d15e4065bf3d6c12ea7 Mon Sep 17 00:00:00 2001
From: Nathan Howell <nhowell@godaddy.com>
Date: Wed, 6 May 2015 22:56:53 -0700
Subject: [PATCH 11/48] [SPARK-5938] [SPARK-5443] [SQL] Improve JsonRDD
 performance

This patch comprises of a few related pieces of work:

* Schema inference is performed directly on the JSON token stream
* `String => Row` conversion populate Spark SQL structures without intermediate types
* Projection pushdown is implemented via CatalystScan for DataFrame queries
* Support for the legacy parser by setting `spark.sql.json.useJacksonStreamingAPI` to `false`

Performance improvements depend on the schema and queries being executed, but it should be faster across the board. Below are benchmarks using the last.fm Million Song dataset:

```
Command                                            | Baseline | Patched
---------------------------------------------------|----------|--------
import sqlContext.implicits._                      |          |
val df = sqlContext.jsonFile("/tmp/lastfm.json")   |    70.0s |   14.6s
df.count()                                         |    28.8s |    6.2s
df.rdd.count()                                     |    35.3s |   21.5s
df.where($"artist" === "Robert Hood").collect()    |    28.3s |   16.9s
```

To prepare this dataset for benchmarking, follow these steps:

```
# Fetch the datasets from http://labrosa.ee.columbia.edu/millionsong/lastfm
wget http://labrosa.ee.columbia.edu/millionsong/sites/default/files/lastfm/lastfm_test.zip \
     http://labrosa.ee.columbia.edu/millionsong/sites/default/files/lastfm/lastfm_train.zip

# Decompress and combine, pipe through `jq -c` to ensure there is one record per line
unzip -p lastfm_test.zip lastfm_train.zip  | jq -c . > lastfm.json
```

Author: Nathan Howell <nhowell@godaddy.com>

Closes #5801 from NathanHowell/json-performance and squashes the following commits:

26fea31 [Nathan Howell] Recreate the baseRDD each for each scan operation
a7ebeb2 [Nathan Howell] Increase coverage of inserts into a JSONRelation
e06a1dd [Nathan Howell] Add comments to the `useJacksonStreamingAPI` config flag
6822712 [Nathan Howell] Split up JsonRDD2 into multiple objects
fa8234f [Nathan Howell] Wrap long lines
b31917b [Nathan Howell] Rename `useJsonRDD2` to `useJacksonStreamingAPI`
15c5d1b [Nathan Howell] JSONRelation's baseRDD need not be lazy
f8add6e [Nathan Howell] Add comments on lack of support for precision and scale DecimalTypes
fa0be47 [Nathan Howell] Remove unused default case in the field parser
80dba17 [Nathan Howell] Add comments regarding null handling and empty strings
842846d [Nathan Howell] Point the empty schema inference test at JsonRDD2
ab6ee87 [Nathan Howell] Add projection pushdown support to JsonRDD/JsonRDD2
f636c14 [Nathan Howell] Enable JsonRDD2 by default, add a flag to switch back to JsonRDD
0bbc445 [Nathan Howell] Improve JSON parsing and type inference performance
7ca70c1 [Nathan Howell] Eliminate arrow pattern, replace with pattern matches
---
 .../catalyst/analysis/HiveTypeCoercion.scala  |  43 ++--
 .../apache/spark/sql/types/StructType.scala   |   4 +
 .../org/apache/spark/sql/DataFrame.scala      |   4 +-
 .../scala/org/apache/spark/sql/SQLConf.scala  |   8 +
 .../org/apache/spark/sql/SQLContext.scala     |  34 +--
 .../apache/spark/sql/json/InferSchema.scala   | 171 ++++++++++++++
 .../apache/spark/sql/json/JSONRelation.scala  |  99 ++++++--
 .../spark/sql/json/JacksonGenerator.scala     |  77 +++++++
 .../apache/spark/sql/json/JacksonParser.scala | 215 ++++++++++++++++++
 .../apache/spark/sql/json/JacksonUtils.scala  |  32 +++
 .../org/apache/spark/sql/json/JsonRDD.scala   |  50 ----
 .../org/apache/spark/sql/json/JsonSuite.scala |  51 +++--
 .../spark/sql/sources/InsertSuite.scala       |  55 ++++-
 13 files changed, 715 insertions(+), 128 deletions(-)
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/json/InferSchema.scala
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/json/JacksonGenerator.scala
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/json/JacksonParser.scala
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/json/JacksonUtils.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala
index 96e2aee4de15b..873c75c525c3b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala
@@ -26,7 +26,14 @@ object HiveTypeCoercion {
   // See https://cwiki.apache.org/confluence/display/Hive/LanguageManual+Types.
   // The conversion for integral and floating point types have a linear widening hierarchy:
   private val numericPrecedence =
-    Seq(ByteType, ShortType, IntegerType, LongType, FloatType, DoubleType, DecimalType.Unlimited)
+    IndexedSeq(
+      ByteType,
+      ShortType,
+      IntegerType,
+      LongType,
+      FloatType,
+      DoubleType,
+      DecimalType.Unlimited)
 
   /**
    * Find the tightest common type of two types that might be used in a binary expression.
@@ -34,25 +41,21 @@ object HiveTypeCoercion {
    * with primitive types, because in that case the precision and scale of the result depends on
    * the operation. Those rules are implemented in [[HiveTypeCoercion.DecimalPrecision]].
    */
-  def findTightestCommonType(t1: DataType, t2: DataType): Option[DataType] = {
-    val valueTypes = Seq(t1, t2).filter(t => t != NullType)
-    if (valueTypes.distinct.size > 1) {
-      // Promote numeric types to the highest of the two and all numeric types to unlimited decimal
-      if (numericPrecedence.contains(t1) && numericPrecedence.contains(t2)) {
-        Some(numericPrecedence.filter(t => t == t1 || t == t2).last)
-      } else if (t1.isInstanceOf[DecimalType] && t2.isInstanceOf[DecimalType]) {
-        // Fixed-precision decimals can up-cast into unlimited
-        if (t1 == DecimalType.Unlimited || t2 == DecimalType.Unlimited) {
-          Some(DecimalType.Unlimited)
-        } else {
-          None
-        }
-      } else {
-        None
-      }
-    } else {
-      Some(if (valueTypes.size == 0) NullType else valueTypes.head)
-    }
+  val findTightestCommonType: (DataType, DataType) => Option[DataType] = {
+    case (t1, t2) if t1 == t2 => Some(t1)
+    case (NullType, t1) => Some(t1)
+    case (t1, NullType) => Some(t1)
+
+    // Promote numeric types to the highest of the two and all numeric types to unlimited decimal
+    case (t1, t2) if Seq(t1, t2).forall(numericPrecedence.contains) =>
+      val index = numericPrecedence.lastIndexWhere(t => t == t1 || t == t2)
+      Some(numericPrecedence(index))
+
+    // Fixed-precision decimals can up-cast into unlimited
+    case (DecimalType.Unlimited, _: DecimalType) => Some(DecimalType.Unlimited)
+    case (_: DecimalType, DecimalType.Unlimited) => Some(DecimalType.Unlimited)
+
+    case _ => None
   }
 }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala
index d80ffca18ec9a..7e00a27dfe724 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala
@@ -134,6 +134,10 @@ case class StructType(fields: Array[StructField]) extends DataType with Seq[Stru
       throw new IllegalArgumentException(s"""Field "$name" does not exist."""))
   }
 
+  private[sql] def getFieldIndex(name: String): Option[Int] = {
+    nameToIndex.get(name)
+  }
+
   protected[sql] def toAttributes: Seq[AttributeReference] =
     map(f => AttributeReference(f.name, f.dataType, f.nullable, f.metadata)())
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
index 9d2cd7aae3b82..79fbf50300d4d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
@@ -42,7 +42,7 @@ import org.apache.spark.sql.catalyst.plans.{JoinType, Inner}
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.execution.{EvaluatePython, ExplainCommand, LogicalRDD}
 import org.apache.spark.sql.jdbc.JDBCWriteDetails
-import org.apache.spark.sql.json.JsonRDD
+import org.apache.spark.sql.json.{JacksonGenerator, JsonRDD}
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.sources.{ResolvedDataSource, CreateTableUsingAsSelect}
 import org.apache.spark.util.Utils
@@ -1415,7 +1415,7 @@ class DataFrame private[sql](
       new Iterator[String] {
         override def hasNext: Boolean = iter.hasNext
         override def next(): String = {
-          JsonRDD.rowToJSON(rowSchema, gen)(iter.next())
+          JacksonGenerator(rowSchema, gen)(iter.next())
           gen.flush()
 
           val json = writer.toString
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala
index 3ffc2091d6ba1..bfaddd0f2ce1b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala
@@ -73,6 +73,8 @@ private[spark] object SQLConf {
 
   val USE_SQL_SERIALIZER2 = "spark.sql.useSerializer2"
 
+  val USE_JACKSON_STREAMING_API = "spark.sql.json.useJacksonStreamingAPI"
+
   object Deprecated {
     val MAPRED_REDUCE_TASKS = "mapred.reduce.tasks"
   }
@@ -166,6 +168,12 @@ private[sql] class SQLConf extends Serializable {
 
   private[spark] def useSqlSerializer2: Boolean = getConf(USE_SQL_SERIALIZER2, "true").toBoolean
 
+  /**
+   * Selects between the new (true) and old (false) JSON handlers, to be removed in Spark 1.5.0
+   */
+  private[spark] def useJacksonStreamingAPI: Boolean =
+    getConf(USE_JACKSON_STREAMING_API, "true").toBoolean
+
   /**
    * Upper bound on the sizes (in bytes) of the tables qualified for the auto conversion to
    * a broadcast value during the physical executions of join operations.  Setting this to -1
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
index 7eabb93c1e3d6..0563430a6fdc7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
@@ -659,13 +659,17 @@ class SQLContext(@transient val sparkContext: SparkContext)
    */
   @Experimental
   def jsonRDD(json: RDD[String], schema: StructType): DataFrame = {
-    val columnNameOfCorruptJsonRecord = conf.columnNameOfCorruptRecord
-    val appliedSchema =
-      Option(schema).getOrElse(
-        JsonRDD.nullTypeToStringType(
-          JsonRDD.inferSchema(json, 1.0, columnNameOfCorruptJsonRecord)))
-    val rowRDD = JsonRDD.jsonStringToRow(json, appliedSchema, columnNameOfCorruptJsonRecord)
-    createDataFrame(rowRDD, appliedSchema, needsConversion = false)
+    if (conf.useJacksonStreamingAPI) {
+      baseRelationToDataFrame(new JSONRelation(() => json, None, 1.0, Some(schema))(this))
+    } else {
+      val columnNameOfCorruptJsonRecord = conf.columnNameOfCorruptRecord
+      val appliedSchema =
+        Option(schema).getOrElse(
+          JsonRDD.nullTypeToStringType(
+            JsonRDD.inferSchema(json, 1.0, columnNameOfCorruptJsonRecord)))
+      val rowRDD = JsonRDD.jsonStringToRow(json, appliedSchema, columnNameOfCorruptJsonRecord)
+      createDataFrame(rowRDD, appliedSchema, needsConversion = false)
+    }
   }
 
   /**
@@ -689,12 +693,16 @@ class SQLContext(@transient val sparkContext: SparkContext)
    */
   @Experimental
   def jsonRDD(json: RDD[String], samplingRatio: Double): DataFrame = {
-    val columnNameOfCorruptJsonRecord = conf.columnNameOfCorruptRecord
-    val appliedSchema =
-      JsonRDD.nullTypeToStringType(
-        JsonRDD.inferSchema(json, samplingRatio, columnNameOfCorruptJsonRecord))
-    val rowRDD = JsonRDD.jsonStringToRow(json, appliedSchema, columnNameOfCorruptJsonRecord)
-    createDataFrame(rowRDD, appliedSchema, needsConversion = false)
+    if (conf.useJacksonStreamingAPI) {
+      baseRelationToDataFrame(new JSONRelation(() => json, None, samplingRatio, None)(this))
+    } else {
+      val columnNameOfCorruptJsonRecord = conf.columnNameOfCorruptRecord
+      val appliedSchema =
+        JsonRDD.nullTypeToStringType(
+          JsonRDD.inferSchema(json, samplingRatio, columnNameOfCorruptJsonRecord))
+      val rowRDD = JsonRDD.jsonStringToRow(json, appliedSchema, columnNameOfCorruptJsonRecord)
+      createDataFrame(rowRDD, appliedSchema, needsConversion = false)
+    }
   }
 
   /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/json/InferSchema.scala b/sql/core/src/main/scala/org/apache/spark/sql/json/InferSchema.scala
new file mode 100644
index 0000000000000..9c58b8e4bb16a
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/json/InferSchema.scala
@@ -0,0 +1,171 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.json
+
+import com.fasterxml.jackson.core._
+
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.catalyst.analysis.HiveTypeCoercion
+import org.apache.spark.sql.json.JacksonUtils.nextUntil
+import org.apache.spark.sql.types._
+
+private[sql] object InferSchema {
+  /**
+   * Infer the type of a collection of json records in three stages:
+   *   1. Infer the type of each record
+   *   2. Merge types by choosing the lowest type necessary to cover equal keys
+   *   3. Replace any remaining null fields with string, the top type
+   */
+  def apply(
+      json: RDD[String],
+      samplingRatio: Double = 1.0,
+      columnNameOfCorruptRecords: String): StructType = {
+    require(samplingRatio > 0, s"samplingRatio ($samplingRatio) should be greater than 0")
+    val schemaData = if (samplingRatio > 0.99) {
+      json
+    } else {
+      json.sample(withReplacement = false, samplingRatio, 1)
+    }
+
+    // perform schema inference on each row and merge afterwards
+    schemaData.mapPartitions { iter =>
+      val factory = new JsonFactory()
+      iter.map { row =>
+        try {
+          val parser = factory.createParser(row)
+          parser.nextToken()
+          inferField(parser)
+        } catch {
+          case _: JsonParseException =>
+            StructType(Seq(StructField(columnNameOfCorruptRecords, StringType)))
+        }
+      }
+    }.treeAggregate[DataType](StructType(Seq()))(compatibleRootType, compatibleRootType) match {
+      case st: StructType => nullTypeToStringType(st)
+    }
+  }
+
+  /**
+   * Infer the type of a json document from the parser's token stream
+   */
+  private def inferField(parser: JsonParser): DataType = {
+    import com.fasterxml.jackson.core.JsonToken._
+    parser.getCurrentToken match {
+      case null | VALUE_NULL => NullType
+
+      case FIELD_NAME =>
+        parser.nextToken()
+        inferField(parser)
+
+      case VALUE_STRING if parser.getTextLength < 1 =>
+        // Zero length strings and nulls have special handling to deal
+        // with JSON generators that do not distinguish between the two.
+        // To accurately infer types for empty strings that are really
+        // meant to represent nulls we assume that the two are isomorphic
+        // but will defer treating null fields as strings until all the
+        // record fields' types have been combined.
+        NullType
+
+      case VALUE_STRING => StringType
+      case START_OBJECT =>
+        val builder = Seq.newBuilder[StructField]
+        while (nextUntil(parser, END_OBJECT)) {
+          builder += StructField(parser.getCurrentName, inferField(parser), nullable = true)
+        }
+
+        StructType(builder.result().sortBy(_.name))
+
+      case START_ARRAY =>
+        // If this JSON array is empty, we use NullType as a placeholder.
+        // If this array is not empty in other JSON objects, we can resolve
+        // the type as we pass through all JSON objects.
+        var elementType: DataType = NullType
+        while (nextUntil(parser, END_ARRAY)) {
+          elementType = compatibleType(elementType, inferField(parser))
+        }
+
+        ArrayType(elementType)
+
+      case VALUE_NUMBER_INT | VALUE_NUMBER_FLOAT =>
+        import JsonParser.NumberType._
+        parser.getNumberType match {
+          // For Integer values, use LongType by default.
+          case INT | LONG => LongType
+          // Since we do not have a data type backed by BigInteger,
+          // when we see a Java BigInteger, we use DecimalType.
+          case BIG_INTEGER | BIG_DECIMAL => DecimalType.Unlimited
+          case FLOAT | DOUBLE => DoubleType
+        }
+
+      case VALUE_TRUE | VALUE_FALSE => BooleanType
+    }
+  }
+
+  private def nullTypeToStringType(struct: StructType): StructType = {
+    val fields = struct.fields.map {
+      case StructField(fieldName, dataType, nullable, _) =>
+        val newType = dataType match {
+          case NullType => StringType
+          case ArrayType(NullType, containsNull) => ArrayType(StringType, containsNull)
+          case ArrayType(struct: StructType, containsNull) =>
+            ArrayType(nullTypeToStringType(struct), containsNull)
+          case struct: StructType =>nullTypeToStringType(struct)
+          case other: DataType => other
+        }
+
+        StructField(fieldName, newType, nullable)
+    }
+
+    StructType(fields)
+  }
+
+  /**
+   * Remove top-level ArrayType wrappers and merge the remaining schemas
+   */
+  private def compatibleRootType: (DataType, DataType) => DataType = {
+    case (ArrayType(ty1, _), ty2) => compatibleRootType(ty1, ty2)
+    case (ty1, ArrayType(ty2, _)) => compatibleRootType(ty1, ty2)
+    case (ty1, ty2) => compatibleType(ty1, ty2)
+  }
+
+  /**
+   * Returns the most general data type for two given data types.
+   */
+  private[json] def compatibleType(t1: DataType, t2: DataType): DataType = {
+    HiveTypeCoercion.findTightestCommonType(t1, t2).getOrElse {
+      // t1 or t2 is a StructType, ArrayType, or an unexpected type.
+      (t1, t2) match {
+        case (other: DataType, NullType) => other
+        case (NullType, other: DataType) => other
+        case (StructType(fields1), StructType(fields2)) =>
+          val newFields = (fields1 ++ fields2).groupBy(field => field.name).map {
+            case (name, fieldTypes) =>
+              val dataType = fieldTypes.view.map(_.dataType).reduce(compatibleType)
+              StructField(name, dataType, nullable = true)
+          }
+          StructType(newFields.toSeq.sortBy(_.name))
+
+        case (ArrayType(elementType1, containsNull1), ArrayType(elementType2, containsNull2)) =>
+          ArrayType(compatibleType(elementType1, elementType2), containsNull1 || containsNull2)
+
+        // strings and every string is a Json object.
+        case (_, _) => StringType
+      }
+    }
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/json/JSONRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/json/JSONRelation.scala
index e3352d02787fd..c772cd1f53e53 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/json/JSONRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/json/JSONRelation.scala
@@ -22,14 +22,16 @@ import java.io.IOException
 import org.apache.hadoop.fs.Path
 
 import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.catalyst.expressions.Row
+import org.apache.spark.sql.catalyst.expressions.{Expression, Attribute, Row}
 import org.apache.spark.sql.sources._
-import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.types.{StructField, StructType}
 import org.apache.spark.sql.{DataFrame, SQLContext, SaveMode}
 
 
 private[sql] class DefaultSource
-  extends RelationProvider with SchemaRelationProvider with CreatableRelationProvider {
+  extends RelationProvider
+  with SchemaRelationProvider
+  with CreatableRelationProvider {
 
   private def checkPath(parameters: Map[String, String]): String = {
     parameters.getOrElse("path", sys.error("'path' must be specified for json data."))
@@ -42,7 +44,7 @@ private[sql] class DefaultSource
     val path = checkPath(parameters)
     val samplingRatio = parameters.get("samplingRatio").map(_.toDouble).getOrElse(1.0)
 
-    JSONRelation(path, samplingRatio, None)(sqlContext)
+    new JSONRelation(path, samplingRatio, None, sqlContext)
   }
 
   /** Returns a new base relation with the given schema and parameters. */
@@ -53,7 +55,7 @@ private[sql] class DefaultSource
     val path = checkPath(parameters)
     val samplingRatio = parameters.get("samplingRatio").map(_.toDouble).getOrElse(1.0)
 
-    JSONRelation(path, samplingRatio, Some(schema))(sqlContext)
+    new JSONRelation(path, samplingRatio, Some(schema), sqlContext)
   }
 
   override def createRelation(
@@ -101,32 +103,87 @@ private[sql] class DefaultSource
   }
 }
 
-private[sql] case class JSONRelation(
-    path: String,
-    samplingRatio: Double,
+private[sql] class JSONRelation(
+    // baseRDD is not immutable with respect to INSERT OVERWRITE
+    // and so it must be recreated at least as often as the
+    // underlying inputs are modified. To be safe, a function is
+    // used instead of a regular RDD value to ensure a fresh RDD is
+    // recreated for each and every operation.
+    baseRDD: () => RDD[String],
+    val path: Option[String],
+    val samplingRatio: Double,
     userSpecifiedSchema: Option[StructType])(
     @transient val sqlContext: SQLContext)
   extends BaseRelation
   with TableScan
-  with InsertableRelation {
-
-  // TODO: Support partitioned JSON relation.
-  private def baseRDD = sqlContext.sparkContext.textFile(path)
+  with InsertableRelation
+  with CatalystScan {
+
+  def this(
+      path: String,
+      samplingRatio: Double,
+      userSpecifiedSchema: Option[StructType],
+      sqlContext: SQLContext) =
+    this(
+      () => sqlContext.sparkContext.textFile(path),
+      Some(path),
+      samplingRatio,
+      userSpecifiedSchema)(sqlContext)
+
+  private val useJacksonStreamingAPI: Boolean = sqlContext.conf.useJacksonStreamingAPI
 
   override val needConversion: Boolean = false
 
-  override val schema = userSpecifiedSchema.getOrElse(
-    JsonRDD.nullTypeToStringType(
-      JsonRDD.inferSchema(
-        baseRDD,
+  override lazy val schema = userSpecifiedSchema.getOrElse {
+    if (useJacksonStreamingAPI) {
+      InferSchema(
+        baseRDD(),
         samplingRatio,
-        sqlContext.conf.columnNameOfCorruptRecord)))
+        sqlContext.conf.columnNameOfCorruptRecord)
+    } else {
+      JsonRDD.nullTypeToStringType(
+        JsonRDD.inferSchema(
+          baseRDD(),
+          samplingRatio,
+          sqlContext.conf.columnNameOfCorruptRecord))
+    }
+  }
 
-  override def buildScan(): RDD[Row] =
-    JsonRDD.jsonStringToRow(baseRDD, schema, sqlContext.conf.columnNameOfCorruptRecord)
+  override def buildScan(): RDD[Row] = {
+    if (useJacksonStreamingAPI) {
+      JacksonParser(
+        baseRDD(),
+        schema,
+        sqlContext.conf.columnNameOfCorruptRecord)
+    } else {
+      JsonRDD.jsonStringToRow(
+        baseRDD(),
+        schema,
+        sqlContext.conf.columnNameOfCorruptRecord)
+    }
+  }
+
+  override def buildScan(requiredColumns: Seq[Attribute], filters: Seq[Expression]): RDD[Row] = {
+    if (useJacksonStreamingAPI) {
+      JacksonParser(
+        baseRDD(),
+        StructType.fromAttributes(requiredColumns),
+        sqlContext.conf.columnNameOfCorruptRecord)
+    } else {
+      JsonRDD.jsonStringToRow(
+        baseRDD(),
+        StructType.fromAttributes(requiredColumns),
+        sqlContext.conf.columnNameOfCorruptRecord)
+    }
+  }
 
   override def insert(data: DataFrame, overwrite: Boolean): Unit = {
-    val filesystemPath = new Path(path)
+    val filesystemPath = path match {
+      case Some(p) => new Path(p)
+      case None =>
+        throw new IOException(s"Cannot INSERT into table with no path defined")
+    }
+
     val fs = filesystemPath.getFileSystem(sqlContext.sparkContext.hadoopConfiguration)
 
     if (overwrite) {
@@ -147,7 +204,7 @@ private[sql] case class JSONRelation(
         }
       }
       // Write the data.
-      data.toJSON.saveAsTextFile(path)
+      data.toJSON.saveAsTextFile(filesystemPath.toString)
       // Right now, we assume that the schema is not changed. We will not update the schema.
       // schema = data.schema
     } else {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/json/JacksonGenerator.scala b/sql/core/src/main/scala/org/apache/spark/sql/json/JacksonGenerator.scala
new file mode 100644
index 0000000000000..80bf74aa02602
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/json/JacksonGenerator.scala
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.json
+
+import scala.collection.Map
+
+import com.fasterxml.jackson.core._
+
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.types._
+
+private[sql] object JacksonGenerator {
+  /** Transforms a single Row to JSON using Jackson
+    *
+    * @param rowSchema the schema object used for conversion
+    * @param gen a JsonGenerator object
+    * @param row The row to convert
+    */
+  def apply(rowSchema: StructType, gen: JsonGenerator)(row: Row): Unit = {
+    def valWriter: (DataType, Any) => Unit = {
+      case (_, null) | (NullType, _)  => gen.writeNull()
+      case (StringType, v: String) => gen.writeString(v)
+      case (TimestampType, v: java.sql.Timestamp) => gen.writeString(v.toString)
+      case (IntegerType, v: Int) => gen.writeNumber(v)
+      case (ShortType, v: Short) => gen.writeNumber(v)
+      case (FloatType, v: Float) => gen.writeNumber(v)
+      case (DoubleType, v: Double) => gen.writeNumber(v)
+      case (LongType, v: Long) => gen.writeNumber(v)
+      case (DecimalType(), v: java.math.BigDecimal) => gen.writeNumber(v)
+      case (ByteType, v: Byte) => gen.writeNumber(v.toInt)
+      case (BinaryType, v: Array[Byte]) => gen.writeBinary(v)
+      case (BooleanType, v: Boolean) => gen.writeBoolean(v)
+      case (DateType, v) => gen.writeString(v.toString)
+      case (udt: UserDefinedType[_], v) => valWriter(udt.sqlType, udt.serialize(v))
+
+      case (ArrayType(ty, _), v: Seq[_] ) =>
+        gen.writeStartArray()
+        v.foreach(valWriter(ty,_))
+        gen.writeEndArray()
+
+      case (MapType(kv,vv, _), v: Map[_,_]) =>
+        gen.writeStartObject()
+        v.foreach { p =>
+          gen.writeFieldName(p._1.toString)
+          valWriter(vv,p._2)
+        }
+        gen.writeEndObject()
+
+      case (StructType(ty), v: Row) =>
+        gen.writeStartObject()
+        ty.zip(v.toSeq).foreach {
+          case (_, null) =>
+          case (field, v) =>
+            gen.writeFieldName(field.name)
+            valWriter(field.dataType, v)
+        }
+        gen.writeEndObject()
+    }
+
+    valWriter(rowSchema, row)
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/json/JacksonParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/json/JacksonParser.scala
new file mode 100644
index 0000000000000..a8e69ae61174f
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/json/JacksonParser.scala
@@ -0,0 +1,215 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.json
+
+import java.io.ByteArrayOutputStream
+import java.sql.Timestamp
+
+import scala.collection.Map
+
+import com.fasterxml.jackson.core._
+
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.json.JacksonUtils.nextUntil
+import org.apache.spark.sql.types._
+
+private[sql] object JacksonParser {
+  def apply(
+      json: RDD[String],
+      schema: StructType,
+      columnNameOfCorruptRecords: String): RDD[Row] = {
+    parseJson(json, schema, columnNameOfCorruptRecords)
+  }
+
+  /**
+   * Parse the current token (and related children) according to a desired schema
+   */
+  private[sql] def convertField(
+      factory: JsonFactory,
+      parser: JsonParser,
+      schema: DataType): Any = {
+    import com.fasterxml.jackson.core.JsonToken._
+    (parser.getCurrentToken, schema) match {
+      case (null | VALUE_NULL, _) =>
+        null
+
+      case (FIELD_NAME, _) =>
+        parser.nextToken()
+        convertField(factory, parser, schema)
+
+      case (VALUE_STRING, StringType) =>
+        UTF8String(parser.getText)
+
+      case (VALUE_STRING, _) if parser.getTextLength < 1 =>
+        // guard the non string type
+        null
+
+      case (VALUE_STRING, DateType) =>
+        DateUtils.millisToDays(DateUtils.stringToTime(parser.getText).getTime)
+
+      case (VALUE_STRING, TimestampType) =>
+        new Timestamp(DateUtils.stringToTime(parser.getText).getTime)
+
+      case (VALUE_NUMBER_INT, TimestampType) =>
+        new Timestamp(parser.getLongValue)
+
+      case (_, StringType) =>
+        val writer = new ByteArrayOutputStream()
+        val generator = factory.createGenerator(writer, JsonEncoding.UTF8)
+        generator.copyCurrentStructure(parser)
+        generator.close()
+        UTF8String(writer.toByteArray)
+
+      case (VALUE_NUMBER_INT | VALUE_NUMBER_FLOAT, FloatType) =>
+        parser.getFloatValue
+
+      case (VALUE_NUMBER_INT | VALUE_NUMBER_FLOAT, DoubleType) =>
+        parser.getDoubleValue
+
+      case (VALUE_NUMBER_INT | VALUE_NUMBER_FLOAT, DecimalType()) =>
+        // TODO: add fixed precision and scale handling
+        Decimal(parser.getDecimalValue)
+
+      case (VALUE_NUMBER_INT, ByteType) =>
+        parser.getByteValue
+
+      case (VALUE_NUMBER_INT, ShortType) =>
+        parser.getShortValue
+
+      case (VALUE_NUMBER_INT, IntegerType) =>
+        parser.getIntValue
+
+      case (VALUE_NUMBER_INT, LongType) =>
+        parser.getLongValue
+
+      case (VALUE_TRUE, BooleanType) =>
+        true
+
+      case (VALUE_FALSE, BooleanType) =>
+        false
+
+      case (START_OBJECT, st: StructType) =>
+        convertObject(factory, parser, st)
+
+      case (START_ARRAY, ArrayType(st, _)) =>
+        convertList(factory, parser, st)
+
+      case (START_OBJECT, ArrayType(st, _)) =>
+        // the business end of SPARK-3308:
+        // when an object is found but an array is requested just wrap it in a list
+        convertField(factory, parser, st) :: Nil
+
+      case (START_OBJECT, MapType(StringType, kt, _)) =>
+        convertMap(factory, parser, kt)
+
+      case (_, udt: UserDefinedType[_]) =>
+        udt.deserialize(convertField(factory, parser, udt.sqlType))
+    }
+  }
+
+  /**
+   * Parse an object from the token stream into a new Row representing the schema.
+   *
+   * Fields in the json that are not defined in the requested schema will be dropped.
+   */
+  private def convertObject(factory: JsonFactory, parser: JsonParser, schema: StructType): Row = {
+    val row = new GenericMutableRow(schema.length)
+    while (nextUntil(parser, JsonToken.END_OBJECT)) {
+      schema.getFieldIndex(parser.getCurrentName) match {
+        case Some(index) =>
+          row.update(index, convertField(factory, parser, schema(index).dataType))
+
+        case None =>
+          parser.skipChildren()
+      }
+    }
+
+    row
+  }
+
+  /**
+   * Parse an object as a Map, preserving all fields
+   */
+  private def convertMap(
+      factory: JsonFactory,
+      parser: JsonParser,
+      valueType: DataType): Map[String, Any] = {
+    val builder = Map.newBuilder[String, Any]
+    while (nextUntil(parser, JsonToken.END_OBJECT)) {
+      builder += parser.getCurrentName -> convertField(factory, parser, valueType)
+    }
+
+    builder.result()
+  }
+
+  private def convertList(
+      factory: JsonFactory,
+      parser: JsonParser,
+      schema: DataType): Seq[Any] = {
+    val builder = Seq.newBuilder[Any]
+    while (nextUntil(parser, JsonToken.END_ARRAY)) {
+      builder += convertField(factory, parser, schema)
+    }
+
+    builder.result()
+  }
+
+  private def parseJson(
+      json: RDD[String],
+      schema: StructType,
+      columnNameOfCorruptRecords: String): RDD[Row] = {
+
+    def failedRecord(record: String): Seq[Row] = {
+      // create a row even if no corrupt record column is present
+      val row = new GenericMutableRow(schema.length)
+      for (corruptIndex <- schema.getFieldIndex(columnNameOfCorruptRecords)) {
+        require(schema(corruptIndex).dataType == StringType)
+        row.update(corruptIndex, record)
+      }
+
+      Seq(row)
+    }
+
+    json.mapPartitions { iter =>
+      val factory = new JsonFactory()
+
+      iter.flatMap { record =>
+        try {
+          val parser = factory.createParser(record)
+          parser.nextToken()
+
+          // to support both object and arrays (see SPARK-3308) we'll start
+          // by converting the StructType schema to an ArrayType and let
+          // convertField wrap an object into a single value array when necessary.
+          convertField(factory, parser, ArrayType(schema)) match {
+            case null => failedRecord(record)
+            case list: Seq[Row @unchecked] => list
+            case _ =>
+              sys.error(
+                s"Failed to parse record $record. Please make sure that each line of the file " +
+                  "(or each string in the RDD) is a valid JSON object or an array of JSON objects.")
+          }
+        } catch {
+          case _: JsonProcessingException =>
+            failedRecord(record)
+        }
+      }
+    }
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/json/JacksonUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/json/JacksonUtils.scala
new file mode 100644
index 0000000000000..fde96852ce68e
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/json/JacksonUtils.scala
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.json
+
+import com.fasterxml.jackson.core.{JsonParser, JsonToken}
+
+private object JacksonUtils {
+  /**
+   * Advance the parser until a null or a specific token is found
+   */
+  def nextUntil(parser: JsonParser, stopOn: JsonToken): Boolean = {
+    parser.nextToken() match {
+      case null => false
+      case x => x != stopOn
+    }
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/json/JsonRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/json/JsonRDD.scala
index 6e94e7056eb0b..f62973d5fcfab 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/json/JsonRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/json/JsonRDD.scala
@@ -440,54 +440,4 @@ private[sql] object JsonRDD extends Logging {
 
     row
   }
-
-  /** Transforms a single Row to JSON using Jackson
-    *
-    * @param rowSchema the schema object used for conversion
-    * @param gen a JsonGenerator object
-    * @param row The row to convert
-    */
-  private[sql] def rowToJSON(rowSchema: StructType, gen: JsonGenerator)(row: Row) = {
-    def valWriter: (DataType, Any) => Unit = {
-      case (_, null) | (NullType, _)  => gen.writeNull()
-      case (StringType, v: String) => gen.writeString(v)
-      case (TimestampType, v: java.sql.Timestamp) => gen.writeString(v.toString)
-      case (IntegerType, v: Int) => gen.writeNumber(v)
-      case (ShortType, v: Short) => gen.writeNumber(v)
-      case (FloatType, v: Float) => gen.writeNumber(v)
-      case (DoubleType, v: Double) => gen.writeNumber(v)
-      case (LongType, v: Long) => gen.writeNumber(v)
-      case (DecimalType(), v: java.math.BigDecimal) => gen.writeNumber(v)
-      case (ByteType, v: Byte) => gen.writeNumber(v.toInt)
-      case (BinaryType, v: Array[Byte]) => gen.writeBinary(v)
-      case (BooleanType, v: Boolean) => gen.writeBoolean(v)
-      case (DateType, v) => gen.writeString(v.toString)
-      case (udt: UserDefinedType[_], v) => valWriter(udt.sqlType, v)
-
-      case (ArrayType(ty, _), v: Seq[_] ) =>
-        gen.writeStartArray()
-        v.foreach(valWriter(ty,_))
-        gen.writeEndArray()
-
-      case (MapType(kv,vv, _), v: Map[_,_]) =>
-        gen.writeStartObject()
-        v.foreach { p =>
-          gen.writeFieldName(p._1.toString)
-          valWriter(vv,p._2)
-        }
-        gen.writeEndObject()
-
-      case (StructType(ty), v: Row) =>
-        gen.writeStartObject()
-        ty.zip(v.toSeq).foreach {
-          case (_, null) =>
-          case (field, v) =>
-            gen.writeFieldName(field.name)
-            valWriter(field.dataType, v)
-        }
-        gen.writeEndObject()
-    }
-
-    valWriter(rowSchema, row)
-  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/json/JsonSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/json/JsonSuite.scala
index fd0e2746dc045..263fafba930ce 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/json/JsonSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/json/JsonSuite.scala
@@ -17,13 +17,15 @@
 
 package org.apache.spark.sql.json
 
+import java.io.StringWriter
 import java.sql.{Date, Timestamp}
 
+import com.fasterxml.jackson.core.JsonFactory
 import org.scalactic.Tolerance._
 
 import org.apache.spark.sql.TestData._
 import org.apache.spark.sql.functions._
-import org.apache.spark.sql.json.JsonRDD.{compatibleType, enforceCorrectType}
+import org.apache.spark.sql.json.InferSchema.compatibleType
 import org.apache.spark.sql.sources.LogicalRelation
 import org.apache.spark.sql.test.TestSQLContext
 import org.apache.spark.sql.test.TestSQLContext._
@@ -46,6 +48,18 @@ class JsonSuite extends QueryTest {
           s"${expected}(${expected.getClass}).")
     }
 
+    val factory = new JsonFactory()
+    def enforceCorrectType(value: Any, dataType: DataType): Any = {
+      val writer = new StringWriter()
+      val generator = factory.createGenerator(writer)
+      generator.writeObject(value)
+      generator.flush()
+
+      val parser = factory.createParser(writer.toString)
+      parser.nextToken()
+      JacksonParser.convertField(factory, parser, dataType)
+    }
+
     val intNumber: Int = 2147483647
     checkTypePromotion(intNumber, enforceCorrectType(intNumber, IntegerType))
     checkTypePromotion(intNumber.toLong, enforceCorrectType(intNumber, LongType))
@@ -439,7 +453,7 @@ class JsonSuite extends QueryTest {
     val jsonDF = jsonRDD(primitiveFieldValueTypeConflict)
     jsonDF.registerTempTable("jsonTable")
 
-    // Right now, the analyzer does not promote strings in a boolean expreesion.
+    // Right now, the analyzer does not promote strings in a boolean expression.
     // Number and Boolean conflict: resolve the type as boolean in this query.
     checkAnswer(
       sql("select num_bool from jsonTable where NOT num_bool"),
@@ -508,7 +522,7 @@ class JsonSuite extends QueryTest {
       Row(Seq(), "11", "[1,2,3]", Row(null), "[]") ::
         Row(null, """{"field":false}""", null, null, "{}") ::
         Row(Seq(4, 5, 6), null, "str", Row(null), "[7,8,9]") ::
-        Row(Seq(7), "{}","[str1,str2,33]", Row("str"), """{"field":true}""") :: Nil
+        Row(Seq(7), "{}","""["str1","str2",33]""", Row("str"), """{"field":true}""") :: Nil
     )
   }
 
@@ -566,19 +580,19 @@ class JsonSuite extends QueryTest {
     val analyzed = jsonDF.queryExecution.analyzed
     assert(
       analyzed.isInstanceOf[LogicalRelation],
-      "The DataFrame returned by jsonFile should be based on JSONRelation.")
+      "The DataFrame returned by jsonFile should be based on LogicalRelation.")
     val relation = analyzed.asInstanceOf[LogicalRelation].relation
     assert(
       relation.isInstanceOf[JSONRelation],
       "The DataFrame returned by jsonFile should be based on JSONRelation.")
-    assert(relation.asInstanceOf[JSONRelation].path === path)
+    assert(relation.asInstanceOf[JSONRelation].path === Some(path))
     assert(relation.asInstanceOf[JSONRelation].samplingRatio === (0.49 +- 0.001))
 
     val schema = StructType(StructField("a", LongType, true) :: Nil)
     val logicalRelation =
       jsonFile(path, schema).queryExecution.analyzed.asInstanceOf[LogicalRelation]
     val relationWithSchema = logicalRelation.relation.asInstanceOf[JSONRelation]
-    assert(relationWithSchema.path === path)
+    assert(relationWithSchema.path === Some(path))
     assert(relationWithSchema.schema === schema)
     assert(relationWithSchema.samplingRatio > 0.99)
   }
@@ -1020,15 +1034,24 @@ class JsonSuite extends QueryTest {
   }
 
   test("JSONRelation equality test") {
-    val relation1 =
-      JSONRelation("path", 1.0, Some(StructType(StructField("a", IntegerType, true) :: Nil)))(null)
+    val context = org.apache.spark.sql.test.TestSQLContext
+    val relation1 = new JSONRelation(
+      "path",
+      1.0,
+      Some(StructType(StructField("a", IntegerType, true) :: Nil)),
+      context)
     val logicalRelation1 = LogicalRelation(relation1)
-    val relation2 =
-      JSONRelation("path", 0.5, Some(StructType(StructField("a", IntegerType, true) :: Nil)))(
-        org.apache.spark.sql.test.TestSQLContext)
+    val relation2 = new JSONRelation(
+      "path",
+      0.5,
+      Some(StructType(StructField("a", IntegerType, true) :: Nil)),
+      context)
     val logicalRelation2 = LogicalRelation(relation2)
-    val relation3 =
-      JSONRelation("path", 1.0, Some(StructType(StructField("b", StringType, true) :: Nil)))(null)
+    val relation3 = new JSONRelation(
+      "path",
+      1.0,
+      Some(StructType(StructField("b", StringType, true) :: Nil)),
+      context)
     val logicalRelation3 = LogicalRelation(relation3)
 
     assert(relation1 === relation2)
@@ -1046,7 +1069,7 @@ class JsonSuite extends QueryTest {
 
   test("SPARK-6245 JsonRDD.inferSchema on empty RDD") {
     // This is really a test that it doesn't throw an exception
-    val emptySchema = JsonRDD.inferSchema(empty, 1.0, "")
+    val emptySchema = InferSchema(empty, 1.0, "")
     assert(StructType(Seq()) === emptySchema)
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
index 80efe9728fbc2..50629ea4dc066 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
@@ -21,7 +21,7 @@ import java.io.File
 
 import org.scalatest.BeforeAndAfterAll
 
-import org.apache.spark.sql.{AnalysisException, Row}
+import org.apache.spark.sql.{SaveMode, AnalysisException, Row}
 import org.apache.spark.util.Utils
 
 class InsertSuite extends DataSourceTest with BeforeAndAfterAll {
@@ -100,23 +100,48 @@ class InsertSuite extends DataSourceTest with BeforeAndAfterAll {
   test("INSERT OVERWRITE a JSONRelation multiple times") {
     sql(
       s"""
-        |INSERT OVERWRITE TABLE jsonTable SELECT a, b FROM jt
-      """.stripMargin)
+         |INSERT OVERWRITE TABLE jsonTable SELECT a, b FROM jt
+    """.stripMargin)
+    checkAnswer(
+      sql("SELECT a, b FROM jsonTable"),
+      (1 to 10).map(i => Row(i, s"str$i"))
+    )
 
+    // Writing the table to less part files.
+    val rdd1 = sparkContext.parallelize((1 to 10).map(i => s"""{"a":$i, "b":"str${i}"}"""), 5)
+    jsonRDD(rdd1).registerTempTable("jt1")
     sql(
       s"""
-        |INSERT OVERWRITE TABLE jsonTable SELECT a, b FROM jt
-      """.stripMargin)
+         |INSERT OVERWRITE TABLE jsonTable SELECT a, b FROM jt1
+    """.stripMargin)
+    checkAnswer(
+      sql("SELECT a, b FROM jsonTable"),
+      (1 to 10).map(i => Row(i, s"str$i"))
+    )
 
+    // Writing the table to more part files.
+    val rdd2 = sparkContext.parallelize((1 to 10).map(i => s"""{"a":$i, "b":"str${i}"}"""), 10)
+    jsonRDD(rdd2).registerTempTable("jt2")
     sql(
       s"""
-        |INSERT OVERWRITE TABLE jsonTable SELECT a, b FROM jt
-      """.stripMargin)
-
+         |INSERT OVERWRITE TABLE jsonTable SELECT a, b FROM jt2
+    """.stripMargin)
     checkAnswer(
       sql("SELECT a, b FROM jsonTable"),
       (1 to 10).map(i => Row(i, s"str$i"))
     )
+
+    sql(
+      s"""
+         |INSERT OVERWRITE TABLE jsonTable SELECT a * 10, b FROM jt1
+    """.stripMargin)
+    checkAnswer(
+      sql("SELECT a, b FROM jsonTable"),
+      (1 to 10).map(i => Row(i * 10, s"str$i"))
+    )
+
+    dropTempTable("jt1")
+    dropTempTable("jt2")
   }
 
   test("INSERT INTO not supported for JSONRelation for now") {
@@ -128,6 +153,20 @@ class InsertSuite extends DataSourceTest with BeforeAndAfterAll {
     }
   }
 
+  test("save directly to the path of a JSON table") {
+    table("jt").selectExpr("a * 5 as a", "b").save(path.toString, "json", SaveMode.Overwrite)
+    checkAnswer(
+      sql("SELECT a, b FROM jsonTable"),
+      (1 to 10).map(i => Row(i * 5, s"str$i"))
+    )
+
+    table("jt").save(path.toString, "json", SaveMode.Overwrite)
+    checkAnswer(
+      sql("SELECT a, b FROM jsonTable"),
+      (1 to 10).map(i => Row(i, s"str$i"))
+    )
+  }
+
   test("it is not allowed to write to a table while querying it.") {
     val message = intercept[AnalysisException] {
       sql(

From cfdadcbd2b529cd9ac721509a7ebafe436afcd8d Mon Sep 17 00:00:00 2001
From: Tathagata Das <tathagata.das1565@gmail.com>
Date: Thu, 7 May 2015 00:21:10 -0700
Subject: [PATCH 12/48] [SPARK-7430] [STREAMING] [TEST] General improvements to
 streaming tests to increase debuggability

Author: Tathagata Das <tathagata.das1565@gmail.com>

Closes #5961 from tdas/SPARK-7430 and squashes the following commits:

d654978 [Tathagata Das] Fix scala style
fbf7174 [Tathagata Das] Added more verbose assert failure messages.
6aea07a [Tathagata Das] Ensure SynchronizedBuffer is used in every TestSuiteBase
---
 .../spark/streaming/TestSuiteBase.scala       | 33 ++++++++++++-------
 1 file changed, 21 insertions(+), 12 deletions(-)

diff --git a/streaming/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala b/streaming/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala
index 4d0cd7516f42e..4f70ae7f1f187 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala
@@ -73,9 +73,11 @@ class TestInputStream[T: ClassTag](ssc_ : StreamingContext, input: Seq[Seq[T]],
  *
  * The buffer contains a sequence of RDD's, each containing a sequence of items
  */
-class TestOutputStream[T: ClassTag](parent: DStream[T],
-    val output: ArrayBuffer[Seq[T]] = ArrayBuffer[Seq[T]]())
-  extends ForEachDStream[T](parent, (rdd: RDD[T], t: Time) => {
+class TestOutputStream[T: ClassTag](
+    parent: DStream[T],
+    val output: SynchronizedBuffer[Seq[T]] =
+      new ArrayBuffer[Seq[T]] with SynchronizedBuffer[Seq[T]]
+  ) extends ForEachDStream[T](parent, (rdd: RDD[T], t: Time) => {
     val collected = rdd.collect()
     output += collected
   }) {
@@ -95,8 +97,10 @@ class TestOutputStream[T: ClassTag](parent: DStream[T],
  * The buffer contains a sequence of RDD's, each containing a sequence of partitions, each
  * containing a sequence of items.
  */
-class TestOutputStreamWithPartitions[T: ClassTag](parent: DStream[T],
-    val output: ArrayBuffer[Seq[Seq[T]]] = ArrayBuffer[Seq[Seq[T]]]())
+class TestOutputStreamWithPartitions[T: ClassTag](
+    parent: DStream[T],
+    val output: SynchronizedBuffer[Seq[Seq[T]]] =
+      new ArrayBuffer[Seq[Seq[T]]] with SynchronizedBuffer[Seq[Seq[T]]])
   extends ForEachDStream[T](parent, (rdd: RDD[T], t: Time) => {
     val collected = rdd.glom().collect().map(_.toSeq)
     output += collected
@@ -108,10 +112,6 @@ class TestOutputStreamWithPartitions[T: ClassTag](parent: DStream[T],
     ois.defaultReadObject()
     output.clear()
   }
-
-  def toTestOutputStream: TestOutputStream[T] = {
-    new TestOutputStream[T](this.parent, this.output.map(_.flatten))
-  }
 }
 
 /**
@@ -425,12 +425,21 @@ trait TestSuiteBase extends FunSuite with BeforeAndAfter with Logging {
     logInfo("--------------------------------")
 
     // Match the output with the expected output
-    assert(output.size === expectedOutput.size, "Number of outputs do not match")
     for (i <- 0 until output.size) {
       if (useSet) {
-        assert(output(i).toSet === expectedOutput(i).toSet)
+        assert(
+          output(i).toSet === expectedOutput(i).toSet,
+          s"Set comparison failed\n" +
+            s"Expected output (${expectedOutput.size} items):\n${expectedOutput.mkString("\n")}\n" +
+            s"Generated output (${output.size} items): ${output.mkString("\n")}"
+        )
       } else {
-        assert(output(i).toList === expectedOutput(i).toList)
+        assert(
+          output(i).toList === expectedOutput(i).toList,
+          s"Ordered list comparison failed\n" +
+            s"Expected output (${expectedOutput.size} items):\n${expectedOutput.mkString("\n")}\n" +
+            s"Generated output (${output.size} items): ${output.mkString("\n")}"
+        )
       }
     }
     logInfo("Output verified successfully")

From 01187f59b3d118495b6cfea965690829b99a36fa Mon Sep 17 00:00:00 2001
From: Tathagata Das <tathagata.das1565@gmail.com>
Date: Thu, 7 May 2015 00:24:44 -0700
Subject: [PATCH 13/48] [SPARK-7217] [STREAMING] Add configuration to control
 the default behavior of StreamingContext.stop() implicitly calling
 SparkContext.stop()

In environments like notebooks, the SparkContext is managed by the underlying infrastructure and it is expected that the SparkContext will not be stopped. However, StreamingContext.stop() calls SparkContext.stop() as a non-intuitive side-effect. This PR adds a configuration in SparkConf that sets the default StreamingContext stop behavior. It should be such that the existing behavior does not change for existing users.

Author: Tathagata Das <tathagata.das1565@gmail.com>

Closes #5929 from tdas/SPARK-7217 and squashes the following commits:

869a763 [Tathagata Das] Changed implementation.
685fe00 [Tathagata Das] Added configuration
---
 .../spark/streaming/StreamingContext.scala    | 10 +++++++---
 .../streaming/StreamingContextSuite.scala     | 19 +++++++++++++++++--
 2 files changed, 24 insertions(+), 5 deletions(-)

diff --git a/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
index b1ad0d42ec8a6..bbdb4e8af036c 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
@@ -563,13 +563,17 @@ class StreamingContext private[streaming] (
 
   /**
    * Stop the execution of the streams immediately (does not wait for all received data
-   * to be processed).
+   * to be processed). By default, if `stopSparkContext` is not specified, the underlying
+   * SparkContext will also be stopped. This implicit behavior can be configured using the
+   * SparkConf configuration spark.streaming.stopSparkContextByDefault.
    *
-   * @param stopSparkContext if true, stops the associated SparkContext. The underlying SparkContext
+   * @param stopSparkContext If true, stops the associated SparkContext. The underlying SparkContext
    *                         will be stopped regardless of whether this StreamingContext has been
    *                         started.
    */
-  def stop(stopSparkContext: Boolean = true): Unit = synchronized {
+  def stop(
+      stopSparkContext: Boolean = conf.getBoolean("spark.streaming.stopSparkContextByDefault", true)
+     ): Unit = synchronized {
     stop(stopSparkContext, false)
   }
 
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
index 5207b7109e69b..a589deb1fa579 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
@@ -118,6 +118,11 @@ class StreamingContextSuite extends FunSuite with BeforeAndAfter with Timeouts w
     assert(ssc.state === ssc.StreamingContextState.Started)
     ssc.stop()
     assert(ssc.state === ssc.StreamingContextState.Stopped)
+
+    // Make sure that the SparkContext is also stopped by default
+    intercept[Exception] {
+      ssc.sparkContext.makeRDD(1 to 10)
+    }
   }
 
   test("start multiple times") {
@@ -154,16 +159,26 @@ class StreamingContextSuite extends FunSuite with BeforeAndAfter with Timeouts w
   }
 
   test("stop only streaming context") {
-    ssc = new StreamingContext(master, appName, batchDuration)
+    val conf = new SparkConf().setMaster(master).setAppName(appName)
+
+    // Explicitly do not stop SparkContext
+    ssc = new StreamingContext(conf, batchDuration)
     sc = ssc.sparkContext
     addInputStream(ssc).register()
     ssc.start()
     ssc.stop(stopSparkContext = false)
     assert(sc.makeRDD(1 to 100).collect().size === 100)
-    ssc = new StreamingContext(sc, batchDuration)
+    sc.stop()
+
+    // Implicitly do not stop SparkContext
+    conf.set("spark.streaming.stopSparkContextByDefault", "false")
+    ssc = new StreamingContext(conf, batchDuration)
+    sc = ssc.sparkContext
     addInputStream(ssc).register()
     ssc.start()
     ssc.stop()
+    assert(sc.makeRDD(1 to 100).collect().size === 100)
+    sc.stop()
   }
 
   test("stop(stopSparkContext=true) after stop(stopSparkContext=false)") {

From fa8fddffd52f8146ccceb72c2990607aaf5b2131 Mon Sep 17 00:00:00 2001
From: Shiti <ssaxena.ece@gmail.com>
Date: Thu, 7 May 2015 01:00:29 -0700
Subject: [PATCH 14/48] [SPARK-7295][SQL]  bitwise operations for DataFrame DSL

Author: Shiti <ssaxena.ece@gmail.com>

Closes #5867 from Shiti/spark-7295 and squashes the following commits:

71a9913 [Shiti] implementation for bitwise and,or, not and xor on Column with tests and docs
---
 python/pyspark/sql/dataframe.py               |  5 +++
 python/pyspark/sql/functions.py               |  2 ++
 python/pyspark/sql/tests.py                   | 13 ++++++++
 .../scala/org/apache/spark/sql/Column.scala   | 31 +++++++++++++++++
 .../org/apache/spark/sql/functions.scala      |  8 +++++
 .../spark/sql/ColumnExpressionSuite.scala     | 33 +++++++++++++++++--
 .../spark/sql/DataFrameFunctionsSuite.scala   |  7 ++++
 7 files changed, 97 insertions(+), 2 deletions(-)

diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index 24f370543def4..cee804f5cc1f7 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -1277,6 +1277,11 @@ def __init__(self, jc):
     __contains__ = _bin_op("contains")
     __getitem__ = _bin_op("getItem")
 
+    # bitwise operators
+    bitwiseOR = _bin_op("bitwiseOR")
+    bitwiseAND = _bin_op("bitwiseAND")
+    bitwiseXOR = _bin_op("bitwiseXOR")
+
     def getItem(self, key):
         """An expression that gets an item at position `ordinal` out of a list,
          or gets an item by key out of a dict.
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 692af868dd534..274c410a1ee9c 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -104,6 +104,8 @@ def _(col1, col2):
     'toRadians': 'Converts an angle measured in degrees to an approximately equivalent angle ' +
              'measured in radians.',
 
+    'bitwiseNOT': 'Computes bitwise not.',
+
     'max': 'Aggregate function: returns the maximum value of the expression in a group.',
     'min': 'Aggregate function: returns the minimum value of the expression in a group.',
     'first': 'Aggregate function: returns the first value in a group.',
diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py
index b232f3a965526..45dfedce22add 100644
--- a/python/pyspark/sql/tests.py
+++ b/python/pyspark/sql/tests.py
@@ -645,6 +645,19 @@ def test_fillna(self):
         self.assertEqual(row.age, None)
         self.assertEqual(row.height, None)
 
+    def test_bitwise_operations(self):
+        from pyspark.sql import functions
+        row = Row(a=170, b=75)
+        df = self.sqlCtx.createDataFrame([row])
+        result = df.select(df.a.bitwiseAND(df.b)).collect()[0].asDict()
+        self.assertEqual(170 & 75, result['(a & b)'])
+        result = df.select(df.a.bitwiseOR(df.b)).collect()[0].asDict()
+        self.assertEqual(170 | 75, result['(a | b)'])
+        result = df.select(df.a.bitwiseXOR(df.b)).collect()[0].asDict()
+        self.assertEqual(170 ^ 75, result['(a ^ b)'])
+        result = df.select(functions.bitwiseNOT(df.b)).collect()[0].asDict()
+        self.assertEqual(~75, result['~b'])
+
 
 class HiveContextSQLTests(ReusedPySparkTestCase):
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
index 8eb632d3d600b..8bbe11b412214 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
@@ -698,6 +698,37 @@ class Column(protected[sql] val expr: Expression) extends Logging {
       println(expr.prettyString)
     }
   }
+
+  /**
+   * Compute bitwise OR of this expression with another expression.
+   * {{{
+   *   df.select($"colA".bitwiseOR($"colB"))
+   * }}}
+   *
+   * @group expr_ops
+   */
+  def bitwiseOR(other: Any): Column = BitwiseOr(expr, lit(other).expr)
+
+  /**
+   * Compute bitwise AND of this expression with another expression.
+   * {{{
+   *   df.select($"colA".bitwiseAND($"colB"))
+   * }}}
+   *
+   * @group expr_ops
+   */
+  def bitwiseAND(other: Any): Column = BitwiseAnd(expr, lit(other).expr)
+
+  /**
+   * Compute bitwise XOR of this expression with another expression.
+   * {{{
+   *   df.select($"colA".bitwiseXOR($"colB"))
+   * }}}
+   *
+   * @group expr_ops
+   */
+  def bitwiseXOR(other: Any): Column = BitwiseXor(expr, lit(other).expr)
+
 }
 
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index 830b5017717b5..1728b0b8c910e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -438,6 +438,14 @@ object functions {
    */
   def upper(e: Column): Column = Upper(e.expr)
 
+
+  /**
+   * Computes bitwise NOT.
+   *
+   * @group normal_funcs
+   */
+  def bitwiseNOT(e: Column): Column = BitwiseNot(e.expr)
+
   //////////////////////////////////////////////////////////////////////////////////////////////
   // Math Functions
   //////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
index 3c1ad656fc855..d96186c268720 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
@@ -27,8 +27,6 @@ import org.apache.spark.sql.types._
 class ColumnExpressionSuite extends QueryTest {
   import org.apache.spark.sql.TestData._
 
-  // TODO: Add test cases for bitwise operations.
-
   test("collect on column produced by a binary operator") {
     val df = Seq((1, 2, 3)).toDF("a", "b", "c")
     checkAnswer(df.select(df("a") + df("b")), Seq(Row(3)))
@@ -385,4 +383,35 @@ class ColumnExpressionSuite extends QueryTest {
       assert(row.getDouble(1) >= -4.0)
     }
   }
+
+  test("bitwiseAND") {
+    checkAnswer(
+      testData2.select($"a".bitwiseAND(75)),
+      testData2.collect().toSeq.map(r => Row(r.getInt(0) & 75)))
+
+    checkAnswer(
+      testData2.select($"a".bitwiseAND($"b").bitwiseAND(22)),
+      testData2.collect().toSeq.map(r => Row(r.getInt(0) & r.getInt(1) & 22)))
+  }
+
+  test("bitwiseOR") {
+    checkAnswer(
+      testData2.select($"a".bitwiseOR(170)),
+      testData2.collect().toSeq.map(r => Row(r.getInt(0) | 170)))
+
+    checkAnswer(
+      testData2.select($"a".bitwiseOR($"b").bitwiseOR(42)),
+      testData2.collect().toSeq.map(r => Row(r.getInt(0) | r.getInt(1) | 42)))
+  }
+
+  test("bitwiseXOR") {
+    checkAnswer(
+      testData2.select($"a".bitwiseXOR(112)),
+      testData2.collect().toSeq.map(r => Row(r.getInt(0) ^ 112)))
+
+    checkAnswer(
+      testData2.select($"a".bitwiseXOR($"b").bitwiseXOR(39)),
+      testData2.collect().toSeq.map(r => Row(r.getInt(0) ^ r.getInt(1) ^ 39)))
+  }
+
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
index ca03713ef4658..b1e0faa310b68 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql
 
+import org.apache.spark.sql.TestData._
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.test.TestSQLContext.implicits._
 import org.apache.spark.sql.types._
@@ -81,4 +82,10 @@ class DataFrameFunctionsSuite extends QueryTest {
       struct(col("a") * 2)
     }
   }
+
+  test("bitwiseNOT") {
+    checkAnswer(
+      testData2.select(bitwiseNOT($"a")),
+      testData2.collect().toSeq.map(r => Row(~r.getInt(0))))
+  }
 }

From fae4e2d6094de57a438ee4188ce47fc5b01b96fe Mon Sep 17 00:00:00 2001
From: ksonj <kson@siberie.de>
Date: Thu, 7 May 2015 01:02:00 -0700
Subject: [PATCH 15/48] [SPARK-7035] Encourage __getitem__ over __getattr__ on
 column access in the Python DataFrame API

Author: ksonj <kson@siberie.de>

Closes #5971 from ksonj/doc and squashes the following commits:

dadfebb [ksonj] __getitem__ is cleaner than __getattr__
---
 docs/sql-programming-guide.md | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/docs/sql-programming-guide.md b/docs/sql-programming-guide.md
index b8233ae06fdf3..df4c123bdd86c 100644
--- a/docs/sql-programming-guide.md
+++ b/docs/sql-programming-guide.md
@@ -139,7 +139,6 @@ DataFrames provide a domain-specific language for structured data manipulation i
 
 Here we include some basic examples of structured data processing using DataFrames:
 
-
 <div class="codetabs">
 <div data-lang="scala"  markdown="1">
 {% highlight scala %}
@@ -242,6 +241,12 @@ df.groupBy("age").count().show();
 </div>
 
 <div data-lang="python"  markdown="1">
+In Python it's possible to access a DataFrame's columns either by attribute
+(`df.age`) or by indexing (`df['age']`). While the former is convenient for
+interactive data exploration, users are highly encouraged to use the
+latter form, which is future proof and won't break with column names that
+are also attributes on the DataFrame class.
+
 {% highlight python %}
 from pyspark.sql import SQLContext
 sqlContext = SQLContext(sc)
@@ -270,14 +275,14 @@ df.select("name").show()
 ## Justin
 
 # Select everybody, but increment the age by 1
-df.select(df.name, df.age + 1).show()
+df.select(df['name'], df['age'] + 1).show()
 ## name    (age + 1)
 ## Michael null
 ## Andy    31
 ## Justin  20
 
 # Select people older than 21
-df.filter(df.age > 21).show()
+df.filter(df['age'] > 21).show()
 ## age name
 ## 30  Andy
 

From 8b6b46e4ff5f19fb7befecaaa0eda63bf29a0e2c Mon Sep 17 00:00:00 2001
From: "Joseph K. Bradley" <joseph@databricks.com>
Date: Thu, 7 May 2015 01:12:14 -0700
Subject: [PATCH 16/48] [SPARK-7421] [MLLIB] OnlineLDA cleanups

Small changes, primarily to allow us more flexibility in the future:
* Rename "tau_0" to "tau0"
* Mark LDAOptimizer trait sealed and DeveloperApi.
* Mark LDAOptimizer subclasses as final.
* Mark setOptimizer (the one taking an LDAOptimizer) and getOptimizer as DeveloperApi since we may need to change them in the future

CC: hhbyyh

Author: Joseph K. Bradley <joseph@databricks.com>

Closes #5956 from jkbradley/onlinelda-cleanups and squashes the following commits:

f4be508 [Joseph K. Bradley] added newline
f4003e4 [Joseph K. Bradley] Changes: * Rename "tau_0" to "tau0" * Mark LDAOptimizer trait sealed and DeveloperApi. * Mark LDAOptimizer subclasses as final. * Mark setOptimizer (the one taking an LDAOptimizer) and getOptimizer as DeveloperApi since we may need to change them in the future
---
 .../apache/spark/mllib/clustering/LDA.scala   | 15 ++++++--
 .../spark/mllib/clustering/LDAOptimizer.scala | 37 +++++++++----------
 .../spark/mllib/clustering/JavaLDASuite.java  |  2 +-
 .../spark/mllib/clustering/LDASuite.scala     |  8 ++--
 4 files changed, 34 insertions(+), 28 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDA.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDA.scala
index c8daa2388e868..a410547a72fda 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDA.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDA.scala
@@ -18,8 +18,9 @@
 package org.apache.spark.mllib.clustering
 
 import breeze.linalg.{DenseVector => BDV}
+
 import org.apache.spark.Logging
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{DeveloperApi, Experimental}
 import org.apache.spark.api.java.JavaPairRDD
 import org.apache.spark.graphx._
 import org.apache.spark.mllib.linalg.Vector
@@ -197,12 +198,20 @@ class LDA private (
   }
 
 
-  /** LDAOptimizer used to perform the actual calculation */
+  /**
+   * :: DeveloperApi ::
+   *
+   * LDAOptimizer used to perform the actual calculation
+   */
+  @DeveloperApi
   def getOptimizer: LDAOptimizer = ldaOptimizer
 
   /**
+   * :: DeveloperApi ::
+   *
    * LDAOptimizer used to perform the actual calculation (default = EMLDAOptimizer)
    */
+  @DeveloperApi
   def setOptimizer(optimizer: LDAOptimizer): this.type = {
     this.ldaOptimizer = optimizer
     this
@@ -210,7 +219,7 @@ class LDA private (
 
   /**
    * Set the LDAOptimizer used to perform the actual calculation by algorithm name.
-   * Currently "em", "online" is supported.
+   * Currently "em", "online" are supported.
    */
   def setOptimizer(optimizerName: String): this.type = {
     this.ldaOptimizer =
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala
index 093aa0f315ab2..6fa2fe053c6a4 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala
@@ -23,7 +23,7 @@ import breeze.linalg.{DenseVector => BDV, DenseMatrix => BDM, sum, normalize, kr
 import breeze.numerics.{digamma, exp, abs}
 import breeze.stats.distributions.{Gamma, RandBasis}
 
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.graphx._
 import org.apache.spark.graphx.impl.GraphImpl
 import org.apache.spark.mllib.impl.PeriodicGraphCheckpointer
@@ -31,13 +31,13 @@ import org.apache.spark.mllib.linalg.{Matrices, SparseVector, DenseVector, Vecto
 import org.apache.spark.rdd.RDD
 
 /**
- * :: Experimental ::
+ * :: DeveloperApi ::
  *
  * An LDAOptimizer specifies which optimization/learning/inference algorithm to use, and it can
  * hold optimizer-specific parameters for users to set.
  */
-@Experimental
-trait LDAOptimizer {
+@DeveloperApi
+sealed trait LDAOptimizer {
 
   /*
     DEVELOPERS NOTE:
@@ -59,7 +59,7 @@ trait LDAOptimizer {
 }
 
 /**
- * :: Experimental ::
+ * :: DeveloperApi ::
  *
  * Optimizer for EM algorithm which stores data + parameter graph, plus algorithm parameters.
  *
@@ -75,8 +75,8 @@ trait LDAOptimizer {
  *    "On Smoothing and Inference for Topic Models."  UAI, 2009.
  *
  */
-@Experimental
-class EMLDAOptimizer extends LDAOptimizer {
+@DeveloperApi
+final class EMLDAOptimizer extends LDAOptimizer {
 
   import LDA._
 
@@ -211,7 +211,7 @@ class EMLDAOptimizer extends LDAOptimizer {
 
 
 /**
- * :: Experimental ::
+ * :: DeveloperApi ::
  *
  * An online optimizer for LDA. The Optimizer implements the Online variational Bayes LDA
  * algorithm, which processes a subset of the corpus on each iteration, and updates the term-topic
@@ -220,8 +220,8 @@ class EMLDAOptimizer extends LDAOptimizer {
  * Original Online LDA paper:
  *   Hoffman, Blei and Bach, "Online Learning for Latent Dirichlet Allocation." NIPS, 2010.
  */
-@Experimental
-class OnlineLDAOptimizer extends LDAOptimizer {
+@DeveloperApi
+final class OnlineLDAOptimizer extends LDAOptimizer {
 
   // LDA common parameters
   private var k: Int = 0
@@ -243,8 +243,8 @@ class OnlineLDAOptimizer extends LDAOptimizer {
   private var randomGenerator: java.util.Random = null
 
   // Online LDA specific parameters
-  // Learning rate is: (tau_0 + t)^{-kappa}
-  private var tau_0: Double = 1024
+  // Learning rate is: (tau0 + t)^{-kappa}
+  private var tau0: Double = 1024
   private var kappa: Double = 0.51
   private var miniBatchFraction: Double = 0.05
 
@@ -265,16 +265,16 @@ class OnlineLDAOptimizer extends LDAOptimizer {
    * A (positive) learning parameter that downweights early iterations. Larger values make early
    * iterations count less.
    */
-  def getTau_0: Double = this.tau_0
+  def getTau0: Double = this.tau0
 
   /**
    * A (positive) learning parameter that downweights early iterations. Larger values make early
    * iterations count less.
    * Default: 1024, following the original Online LDA paper.
    */
-  def setTau_0(tau_0: Double): this.type = {
-    require(tau_0 > 0,  s"LDA tau_0 must be positive, but was set to $tau_0")
-    this.tau_0 = tau_0
+  def setTau0(tau0: Double): this.type = {
+    require(tau0 > 0,  s"LDA tau0 must be positive, but was set to $tau0")
+    this.tau0 = tau0
     this
   }
 
@@ -434,11 +434,8 @@ class OnlineLDAOptimizer extends LDAOptimizer {
    * Update lambda based on the batch submitted. batchSize can be different for each iteration.
    */
   private[clustering] def update(stat: BDM[Double], iter: Int, batchSize: Int): Unit = {
-    val tau_0 = this.getTau_0
-    val kappa = this.getKappa
-
     // weight of the mini-batch.
-    val weight = math.pow(tau_0 + iter, -kappa)
+    val weight = math.pow(getTau0 + iter, -getKappa)
 
     // Update lambda based on documents.
     lambda = lambda * (1 - weight) +
diff --git a/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaLDASuite.java b/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaLDASuite.java
index f394d903966de..96c2da169961f 100644
--- a/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaLDASuite.java
+++ b/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaLDASuite.java
@@ -117,7 +117,7 @@ public void OnlineOptimizerCompatibility() {
 
     // Train a model
     OnlineLDAOptimizer op = new OnlineLDAOptimizer()
-      .setTau_0(1024)
+      .setTau0(1024)
       .setKappa(0.51)
       .setGammaShape(1e40)
       .setMiniBatchFraction(0.5);
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala
index 2dcc881f5abd2..d5b7d96335744 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala
@@ -138,12 +138,12 @@ class LDASuite extends FunSuite with MLlibTestSparkContext {
     val lda = new LDA().setK(2)
     val corpus = sc.parallelize(tinyCorpus, 2)
     val op = new OnlineLDAOptimizer().initialize(corpus, lda)
-    op.setKappa(0.9876).setMiniBatchFraction(0.123).setTau_0(567)
+    op.setKappa(0.9876).setMiniBatchFraction(0.123).setTau0(567)
     assert(op.getAlpha == 0.5) // default 1.0 / k
     assert(op.getEta == 0.5)   // default 1.0 / k
     assert(op.getKappa == 0.9876)
     assert(op.getMiniBatchFraction == 0.123)
-    assert(op.getTau_0 == 567)
+    assert(op.getTau0 == 567)
   }
 
   test("OnlineLDAOptimizer one iteration") {
@@ -159,7 +159,7 @@ class LDASuite extends FunSuite with MLlibTestSparkContext {
     val corpus = sc.parallelize(docs, 2)
 
     // Set GammaShape large to avoid the stochastic impact.
-    val op = new OnlineLDAOptimizer().setTau_0(1024).setKappa(0.51).setGammaShape(1e40)
+    val op = new OnlineLDAOptimizer().setTau0(1024).setKappa(0.51).setGammaShape(1e40)
       .setMiniBatchFraction(1)
     val lda = new LDA().setK(k).setMaxIterations(1).setOptimizer(op).setSeed(12345)
 
@@ -192,7 +192,7 @@ class LDASuite extends FunSuite with MLlibTestSparkContext {
     ).zipWithIndex.map { case (wordCounts, docId) => (docId.toLong, wordCounts) }
 
     val docs = sc.parallelize(toydata)
-    val op = new OnlineLDAOptimizer().setMiniBatchFraction(1).setTau_0(1024).setKappa(0.51)
+    val op = new OnlineLDAOptimizer().setMiniBatchFraction(1).setTau0(1024).setKappa(0.51)
       .setGammaShape(1e10)
     val lda = new LDA().setK(2)
       .setDocConcentration(0.01)

From 4f87e9562aa0dfe5467d7fbaba9278213106377c Mon Sep 17 00:00:00 2001
From: "Joseph K. Bradley" <joseph@databricks.com>
Date: Thu, 7 May 2015 01:28:44 -0700
Subject: [PATCH 17/48] [SPARK-7429] [ML] Params cleanups

Params.setDefault taking a set of ParamPairs should be annotated with varargs. I thought it would not work before, but it apparently does.

CrossValidator.transform should call transformSchema since the underlying Model might be a PipelineModel

CC: mengxr

Author: Joseph K. Bradley <joseph@databricks.com>

Closes #5960 from jkbradley/params-cleanups and squashes the following commits:

118b158 [Joseph K. Bradley] Params.setDefault taking a set of ParamPairs should be annotated with varargs. I thought it would not work before, but it apparently does. CrossValidator.transform should call transformSchema since the underlying Model might be a PipelineModel
---
 mllib/src/main/scala/org/apache/spark/ml/param/params.scala   | 4 +---
 .../scala/org/apache/spark/ml/tuning/CrossValidator.scala     | 3 ++-
 .../test/java/org/apache/spark/ml/param/JavaTestParams.java   | 1 +
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/params.scala b/mllib/src/main/scala/org/apache/spark/ml/param/params.scala
index 51ce19d29cd29..6d09962fe6ee2 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/param/params.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/param/params.scala
@@ -366,13 +366,11 @@ trait Params extends Identifiable with Serializable {
   /**
    * Sets default values for a list of params.
    *
-   * Note: Java developers should use the single-parameter [[setDefault()]].
-   *       Annotating this with varargs causes compilation failures.
-   *
    * @param paramPairs  a list of param pairs that specify params and their default values to set
    *                    respectively. Make sure that the params are initialized before this method
    *                    gets called.
    */
+  @varargs
   protected final def setDefault(paramPairs: ParamPair[_]*): this.type = {
     paramPairs.foreach { p =>
       setDefault(p.param.asInstanceOf[Param[Any]], p.value)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala b/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala
index 9208127eb1d79..ac0d1fed84b2e 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala
@@ -105,7 +105,7 @@ class CrossValidator extends Estimator[CrossValidatorModel] with CrossValidatorP
 
   override def fit(dataset: DataFrame): CrossValidatorModel = {
     val schema = dataset.schema
-    transformSchema(dataset.schema, logging = true)
+    transformSchema(schema, logging = true)
     val sqlCtx = dataset.sqlContext
     val est = $(estimator)
     val eval = $(evaluator)
@@ -159,6 +159,7 @@ class CrossValidatorModel private[ml] (
   }
 
   override def transform(dataset: DataFrame): DataFrame = {
+    transformSchema(dataset.schema, logging = true)
     bestModel.transform(dataset)
   }
 
diff --git a/mllib/src/test/java/org/apache/spark/ml/param/JavaTestParams.java b/mllib/src/test/java/org/apache/spark/ml/param/JavaTestParams.java
index 8abe575610d19..532eca47918fc 100644
--- a/mllib/src/test/java/org/apache/spark/ml/param/JavaTestParams.java
+++ b/mllib/src/test/java/org/apache/spark/ml/param/JavaTestParams.java
@@ -59,5 +59,6 @@ public JavaTestParams() {
       ParamValidators.inArray(validStrings));
     setDefault(myIntParam, 1);
     setDefault(myDoubleParam, 0.5);
+    setDefault(myIntParam.w(1), myDoubleParam.w(0.5));
   }
 }

From ed9be06a4797bbb678355b361054c8872ac20b75 Mon Sep 17 00:00:00 2001
From: Daoyuan Wang <daoyuan.wang@intel.com>
Date: Thu, 7 May 2015 10:05:01 -0700
Subject: [PATCH 18/48] [SPARK-7330] [SQL] avoid NPE at jdbc rdd

Thank nadavoosh point this out in #5590

Author: Daoyuan Wang <daoyuan.wang@intel.com>

Closes #5877 from adrian-wang/jdbcrdd and squashes the following commits:

cc11900 [Daoyuan Wang] avoid NPE in jdbcrdd
---
 .../org/apache/spark/sql/jdbc/JDBCRDD.scala   |  8 +++++-
 .../org/apache/spark/sql/jdbc/JDBCSuite.scala | 25 +++++++++++++++++++
 2 files changed, 32 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRDD.scala
index 325a326e2b5b2..1a5083dbe0f61 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRDD.scala
@@ -362,7 +362,13 @@ private[sql] class JDBCRDD(
           conversions(i) match {
             case BooleanConversion    => mutableRow.setBoolean(i, rs.getBoolean(pos))
             case DateConversion       =>
-              mutableRow.update(i, DateUtils.fromJavaDate(rs.getDate(pos)))
+              // DateUtils.fromJavaDate does not handle null value, so we need to check it.
+              val dateVal = rs.getDate(pos)
+              if (dateVal != null) {
+                mutableRow.update(i, DateUtils.fromJavaDate(dateVal))
+              } else {
+                mutableRow.update(i, null)
+              }
             case DecimalConversion    =>
               val decimalVal = rs.getBigDecimal(pos)
               if (decimalVal == null) {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
index 3ec17d37c025b..021affafe36a6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
@@ -104,6 +104,8 @@ class JDBCSuite extends FunSuite with BeforeAndAfter {
         ).executeUpdate()
     conn.prepareStatement("insert into test.timetypes values ('12:34:56', "
       + "'1996-01-01', '2002-02-20 11:22:33.543543543')").executeUpdate()
+    conn.prepareStatement("insert into test.timetypes values ('12:34:56', "
+      + "null, '2002-02-20 11:22:33.543543543')").executeUpdate()
     conn.commit()
     sql(
       s"""
@@ -127,6 +129,23 @@ class JDBCSuite extends FunSuite with BeforeAndAfter {
         |OPTIONS (url '$url', dbtable 'TEST.FLTTYPES', user 'testUser', password 'testPass')
       """.stripMargin.replaceAll("\n", " "))
 
+    conn.prepareStatement(
+      s"""
+        |create table test.nulltypes (a INT, b BOOLEAN, c TINYINT, d BINARY(20), e VARCHAR(20),
+        |f VARCHAR_IGNORECASE(20), g CHAR(20), h BLOB, i CLOB, j TIME, k DATE, l TIMESTAMP,
+        |m DOUBLE, n REAL, o DECIMAL(40, 20))
+      """.stripMargin.replaceAll("\n", " ")).executeUpdate()
+    conn.prepareStatement("insert into test.nulltypes values ("
+      + "null, null, null, null, null, null, null, null, null, "
+      + "null, null, null, null, null, null)").executeUpdate()
+    conn.commit()
+    sql(
+      s"""
+         |CREATE TEMPORARY TABLE nulltypes
+         |USING org.apache.spark.sql.jdbc
+         |OPTIONS (url '$url', dbtable 'TEST.NULLTYPES', user 'testUser', password 'testPass')
+      """.stripMargin.replaceAll("\n", " "))
+
     // Untested: IDENTITY, OTHER, UUID, ARRAY, and GEOMETRY types.
   }
 
@@ -254,6 +273,7 @@ class JDBCSuite extends FunSuite with BeforeAndAfter {
     val rows = TestSQLContext.jdbc(urlWithUserAndPass, "TEST.TIMETYPES").collect()
     val cachedRows = TestSQLContext.jdbc(urlWithUserAndPass, "TEST.TIMETYPES").cache().collect()
     assert(rows(0).getAs[java.sql.Date](1) === java.sql.Date.valueOf("1996-01-01"))
+    assert(rows(1).getAs[java.sql.Date](1) === null)
     assert(cachedRows(0).getAs[java.sql.Date](1) === java.sql.Date.valueOf("1996-01-01"))
   }
 
@@ -266,6 +286,11 @@ class JDBCSuite extends FunSuite with BeforeAndAfter {
     assert(cachedRows(0).getAs[java.sql.Date](1) === java.sql.Date.valueOf("1996-01-01"))
   }
 
+  test("test types for null value") {
+    val rows = TestSQLContext.jdbc(urlWithUserAndPass, "TEST.NULLTYPES").collect()
+    assert((0 to 14).forall(i => rows(0).isNullAt(i)))
+  }
+
   test("H2 floating-point types") {
     val rows = sql("SELECT * FROM flttypes").collect()
     assert(rows(0).getDouble(0) === 1.00000000000000022) // Yes, I meant ==.

From 9e2ffb13287e6efe256b8d23a4654e4cc305e20b Mon Sep 17 00:00:00 2001
From: Burak Yavuz <brkyvz@gmail.com>
Date: Thu, 7 May 2015 10:25:41 -0700
Subject: [PATCH 19/48] [SPARK-7388] [SPARK-7383] wrapper for VectorAssembler
 in Python

The wrapper required the implementation of the `ArrayParam`, because `Array[T]` is hard to obtain from Python. `ArrayParam` has an extra function called `wCast` which is an internal function to obtain `Array[T]` from `Seq[T]`

Author: Burak Yavuz <brkyvz@gmail.com>
Author: Xiangrui Meng <meng@databricks.com>

Closes #5930 from brkyvz/ml-feat and squashes the following commits:

73e745f [Burak Yavuz] Merge pull request #3 from mengxr/SPARK-7388
c221db9 [Xiangrui Meng] overload StringArrayParam.w
c81072d [Burak Yavuz] addressed comments
99c2ebf [Burak Yavuz] add to python_shared_params
39ecb07 [Burak Yavuz] fix scalastyle
7f7ea2a [Burak Yavuz] [SPARK-7388][SPARK-7383] wrapper for VectorAssembler in Python
---
 .../spark/ml/feature/VectorAssembler.scala    |  2 +-
 .../org/apache/spark/ml/param/params.scala    | 27 ++++++++++--
 .../ml/param/shared/SharedParamsCodeGen.scala |  1 +
 .../spark/ml/param/shared/sharedParams.scala  |  2 +-
 python/pyspark/ml/feature.py                  | 43 ++++++++++++++++++-
 .../ml/param/_shared_params_code_gen.py       |  1 +
 python/pyspark/ml/param/shared.py             | 29 +++++++++++++
 python/pyspark/ml/wrapper.py                  | 13 +++---
 8 files changed, 105 insertions(+), 13 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala
index 8f2e62a8e2081..b5a69cee6daf3 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala
@@ -30,7 +30,7 @@ import org.apache.spark.sql.types._
 
 /**
  * :: AlphaComponent ::
- * A feature transformer than merge multiple columns into a vector column.
+ * A feature transformer that merges multiple columns into a vector column.
  */
 @AlphaComponent
 class VectorAssembler extends Transformer with HasInputCols with HasOutputCol {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/params.scala b/mllib/src/main/scala/org/apache/spark/ml/param/params.scala
index 6d09962fe6ee2..0e1b60d172e3b 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/param/params.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/param/params.scala
@@ -22,6 +22,7 @@ import java.util.NoSuchElementException
 
 import scala.annotation.varargs
 import scala.collection.mutable
+import scala.collection.JavaConverters._
 
 import org.apache.spark.annotation.AlphaComponent
 import org.apache.spark.ml.util.Identifiable
@@ -218,6 +219,19 @@ class BooleanParam(parent: Params, name: String, doc: String) // No need for isV
   override def w(value: Boolean): ParamPair[Boolean] = super.w(value)
 }
 
+/** Specialized version of [[Param[Array[T]]]] for Java. */
+class StringArrayParam(parent: Params, name: String, doc: String, isValid: Array[String] => Boolean)
+  extends Param[Array[String]](parent, name, doc, isValid) {
+
+  def this(parent: Params, name: String, doc: String) =
+    this(parent, name, doc, ParamValidators.alwaysTrue)
+
+  override def w(value: Array[String]): ParamPair[Array[String]] = super.w(value)
+
+  /** Creates a param pair with a [[java.util.List]] of values (for Java and Python). */
+  def w(value: java.util.List[String]): ParamPair[Array[String]] = w(value.asScala.toArray)
+}
+
 /**
  * A param amd its value.
  */
@@ -310,9 +324,7 @@ trait Params extends Identifiable with Serializable {
    * Sets a parameter in the embedded param map.
    */
   protected final def set[T](param: Param[T], value: T): this.type = {
-    shouldOwn(param)
-    paramMap.put(param.asInstanceOf[Param[Any]], value)
-    this
+    set(param -> value)
   }
 
   /**
@@ -322,6 +334,15 @@ trait Params extends Identifiable with Serializable {
     set(getParam(param), value)
   }
 
+  /**
+   * Sets a parameter in the embedded param map.
+   */
+  protected final def set(paramPair: ParamPair[_]): this.type = {
+    shouldOwn(paramPair.param)
+    paramMap.put(paramPair)
+    this
+  }
+
   /**
    * Optionally returns the user-supplied value of a param.
    */
diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala b/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala
index 0e1ff97a8bf60..5085b798daa17 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala
@@ -85,6 +85,7 @@ private[shared] object SharedParamsCodeGen {
         case _ if c == classOf[Float] => "FloatParam"
         case _ if c == classOf[Double] => "DoubleParam"
         case _ if c == classOf[Boolean] => "BooleanParam"
+        case _ if c.isArray && c.getComponentType == classOf[String] => s"StringArrayParam"
         case _ => s"Param[${getTypeString(c)}]"
       }
     }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala b/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala
index 87f86807c3c91..7525d37007377 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala
@@ -178,7 +178,7 @@ private[ml] trait HasInputCols extends Params {
    * Param for input column names.
    * @group param
    */
-  final val inputCols: Param[Array[String]] = new Param[Array[String]](this, "inputCols", "input column names")
+  final val inputCols: StringArrayParam = new StringArrayParam(this, "inputCols", "input column names")
 
   /** @group getParam */
   final def getInputCols: Array[String] = $(inputCols)
diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py
index 4e4614b859ac6..8a0fdddd2d9b5 100644
--- a/python/pyspark/ml/feature.py
+++ b/python/pyspark/ml/feature.py
@@ -16,12 +16,12 @@
 #
 
 from pyspark.rdd import ignore_unicode_prefix
-from pyspark.ml.param.shared import HasInputCol, HasOutputCol, HasNumFeatures
+from pyspark.ml.param.shared import HasInputCol, HasInputCols, HasOutputCol, HasNumFeatures
 from pyspark.ml.util import keyword_only
 from pyspark.ml.wrapper import JavaTransformer
 from pyspark.mllib.common import inherit_doc
 
-__all__ = ['Tokenizer', 'HashingTF']
+__all__ = ['Tokenizer', 'HashingTF', 'VectorAssembler']
 
 
 @inherit_doc
@@ -112,6 +112,45 @@ def setParams(self, numFeatures=1 << 18, inputCol=None, outputCol=None):
         return self._set(**kwargs)
 
 
+@inherit_doc
+class VectorAssembler(JavaTransformer, HasInputCols, HasOutputCol):
+    """
+    A feature transformer that merges multiple columns into a vector column.
+
+    >>> from pyspark.sql import Row
+    >>> df = sc.parallelize([Row(a=1, b=0, c=3)]).toDF()
+    >>> vecAssembler = VectorAssembler(inputCols=["a", "b", "c"], outputCol="features")
+    >>> vecAssembler.transform(df).head().features
+    SparseVector(3, {0: 1.0, 2: 3.0})
+    >>> vecAssembler.setParams(outputCol="freqs").transform(df).head().freqs
+    SparseVector(3, {0: 1.0, 2: 3.0})
+    >>> params = {vecAssembler.inputCols: ["b", "a"], vecAssembler.outputCol: "vector"}
+    >>> vecAssembler.transform(df, params).head().vector
+    SparseVector(2, {1: 1.0})
+    """
+
+    _java_class = "org.apache.spark.ml.feature.VectorAssembler"
+
+    @keyword_only
+    def __init__(self, inputCols=None, outputCol=None):
+        """
+        __init__(self, inputCols=None, outputCol=None)
+        """
+        super(VectorAssembler, self).__init__()
+        self._setDefault()
+        kwargs = self.__init__._input_kwargs
+        self.setParams(**kwargs)
+
+    @keyword_only
+    def setParams(self, inputCols=None, outputCol=None):
+        """
+        setParams(self, inputCols=None, outputCol=None)
+        Sets params for this VectorAssembler.
+        """
+        kwargs = self.setParams._input_kwargs
+        return self._set(**kwargs)
+
+
 if __name__ == "__main__":
     import doctest
     from pyspark.context import SparkContext
diff --git a/python/pyspark/ml/param/_shared_params_code_gen.py b/python/pyspark/ml/param/_shared_params_code_gen.py
index c71c823db2c81..c1c8e921dda87 100644
--- a/python/pyspark/ml/param/_shared_params_code_gen.py
+++ b/python/pyspark/ml/param/_shared_params_code_gen.py
@@ -95,6 +95,7 @@ def get$Name(self):
         ("predictionCol", "prediction column name", "'prediction'"),
         ("rawPredictionCol", "raw prediction column name", "'rawPrediction'"),
         ("inputCol", "input column name", None),
+        ("inputCols", "input column names", None),
         ("outputCol", "output column name", None),
         ("numFeatures", "number of features", None)]
     code = []
diff --git a/python/pyspark/ml/param/shared.py b/python/pyspark/ml/param/shared.py
index 4f243844f8caa..aaf80f00085bf 100644
--- a/python/pyspark/ml/param/shared.py
+++ b/python/pyspark/ml/param/shared.py
@@ -223,6 +223,35 @@ def getInputCol(self):
         return self.getOrDefault(self.inputCol)
 
 
+class HasInputCols(Params):
+    """
+    Mixin for param inputCols: input column names.
+    """
+
+    # a placeholder to make it appear in the generated doc
+    inputCols = Param(Params._dummy(), "inputCols", "input column names")
+
+    def __init__(self):
+        super(HasInputCols, self).__init__()
+        #: param for input column names
+        self.inputCols = Param(self, "inputCols", "input column names")
+        if None is not None:
+            self._setDefault(inputCols=None)
+
+    def setInputCols(self, value):
+        """
+        Sets the value of :py:attr:`inputCols`.
+        """
+        self.paramMap[self.inputCols] = value
+        return self
+
+    def getInputCols(self):
+        """
+        Gets the value of inputCols or its default value.
+        """
+        return self.getOrDefault(self.inputCols)
+
+
 class HasOutputCol(Params):
     """
     Mixin for param outputCol: output column name.
diff --git a/python/pyspark/ml/wrapper.py b/python/pyspark/ml/wrapper.py
index 0634254bbd5cf..f5ac2a398642a 100644
--- a/python/pyspark/ml/wrapper.py
+++ b/python/pyspark/ml/wrapper.py
@@ -67,7 +67,9 @@ def _transfer_params_to_java(self, params, java_obj):
         paramMap = self.extractParamMap(params)
         for param in self.params:
             if param in paramMap:
-                java_obj.set(param.name, paramMap[param])
+                value = paramMap[param]
+                java_param = java_obj.getParam(param.name)
+                java_obj.set(java_param.w(value))
 
     def _empty_java_param_map(self):
         """
@@ -79,7 +81,8 @@ def _create_java_param_map(self, params, java_obj):
         paramMap = self._empty_java_param_map()
         for param, value in params.items():
             if param.parent is self:
-                paramMap.put(java_obj.getParam(param.name), value)
+                java_param = java_obj.getParam(param.name)
+                paramMap.put(java_param.w(value))
         return paramMap
 
 
@@ -126,10 +129,8 @@ class JavaTransformer(Transformer, JavaWrapper):
 
     def transform(self, dataset, params={}):
         java_obj = self._java_obj()
-        self._transfer_params_to_java({}, java_obj)
-        java_param_map = self._create_java_param_map(params, java_obj)
-        return DataFrame(java_obj.transform(dataset._jdf, java_param_map),
-                         dataset.sql_ctx)
+        self._transfer_params_to_java(params, java_obj)
+        return DataFrame(java_obj.transform(dataset._jdf), dataset.sql_ctx)
 
 
 @inherit_doc

From 068c3158ac0c66e20d90a45e6a2a0b93108e08d5 Mon Sep 17 00:00:00 2001
From: Olivier Girardot <o.girardot@lateral-thoughts.com>
Date: Thu, 7 May 2015 10:58:35 -0700
Subject: [PATCH 20/48] [SPARK-7118] [Python] Add the coalesce Spark SQL
 function available in PySpark

This patch adds a proxy call from PySpark to the Spark SQL coalesce function and this patch comes out of a discussion on devspark with rxin

This contribution is my original work and i license the work to the project under the project's open source license.

Olivier.

Author: Olivier Girardot <o.girardot@lateral-thoughts.com>

Closes #5698 from ogirardot/master and squashes the following commits:

d9a4439 [Olivier Girardot] SPARK-7118 Add the coalesce Spark SQL function available in PySpark
---
 python/pyspark/sql/functions.py | 37 +++++++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)

diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 274c410a1ee9c..38a043a3c59d7 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -37,6 +37,7 @@
     'rand',
     'randn',
     'sparkPartitionId',
+    'coalesce',
     'udf']
 
 
@@ -167,6 +168,42 @@ def approxCountDistinct(col, rsd=None):
     return Column(jc)
 
 
+def coalesce(*cols):
+    """Returns the first column that is not null.
+
+    >>> cDf = sqlContext.createDataFrame([(None, None), (1, None), (None, 2)], ("a", "b"))
+    >>> cDf.show()
+    +----+----+
+    |   a|   b|
+    +----+----+
+    |null|null|
+    |   1|null|
+    |null|   2|
+    +----+----+
+
+    >>> cDf.select(coalesce(cDf["a"], cDf["b"])).show()
+    +-------------+
+    |Coalesce(a,b)|
+    +-------------+
+    |         null|
+    |            1|
+    |            2|
+    +-------------+
+
+    >>> cDf.select('*', coalesce(cDf["a"], lit(0.0))).show()
+    +----+----+---------------+
+    |   a|   b|Coalesce(a,0.0)|
+    +----+----+---------------+
+    |null|null|            0.0|
+    |   1|null|            1.0|
+    |null|   2|            0.0|
+    +----+----+---------------+
+    """
+    sc = SparkContext._active_spark_context
+    jc = sc._jvm.functions.coalesce(_to_seq(sc, cols, _to_java_column))
+    return Column(jc)
+
+
 def countDistinct(col, *cols):
     """Returns a new :class:`Column` for distinct count of ``col`` or ``cols``.
 

From 1712a7c7057bf6dd5da8aea1d7fbecdf96ea4b32 Mon Sep 17 00:00:00 2001
From: Yanbo Liang <ybliang8@gmail.com>
Date: Thu, 7 May 2015 11:18:32 -0700
Subject: [PATCH 21/48] [SPARK-6093] [MLLIB] Add RegressionMetrics in
 PySpark/MLlib

https://issues.apache.org/jira/browse/SPARK-6093

Author: Yanbo Liang <ybliang8@gmail.com>

Closes #5941 from yanboliang/spark-6093 and squashes the following commits:

6934af3 [Yanbo Liang] change to @property
aac3bc5 [Yanbo Liang] Add RegressionMetrics in PySpark/MLlib
---
 .../mllib/evaluation/RegressionMetrics.scala  |  9 +++
 python/pyspark/mllib/evaluation.py            | 78 ++++++++++++++++++-
 2 files changed, 85 insertions(+), 2 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.scala
index 693117d820580..e577bf87f885e 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.scala
@@ -22,6 +22,7 @@ import org.apache.spark.rdd.RDD
 import org.apache.spark.Logging
 import org.apache.spark.mllib.linalg.Vectors
 import org.apache.spark.mllib.stat.{MultivariateStatisticalSummary, MultivariateOnlineSummarizer}
+import org.apache.spark.sql.DataFrame
 
 /**
  * :: Experimental ::
@@ -32,6 +33,14 @@ import org.apache.spark.mllib.stat.{MultivariateStatisticalSummary, Multivariate
 @Experimental
 class RegressionMetrics(predictionAndObservations: RDD[(Double, Double)]) extends Logging {
 
+  /**
+   * An auxiliary constructor taking a DataFrame.
+   * @param predictionAndObservations a DataFrame with two double columns:
+   *                                  prediction and observation
+   */
+  private[mllib] def this(predictionAndObservations: DataFrame) =
+    this(predictionAndObservations.map(r => (r.getDouble(0), r.getDouble(1))))
+
   /**
    * Use MultivariateOnlineSummarizer to calculate summary statistics of observations and errors.
    */
diff --git a/python/pyspark/mllib/evaluation.py b/python/pyspark/mllib/evaluation.py
index 16cb49cc0cfff..3e11df09da6b1 100644
--- a/python/pyspark/mllib/evaluation.py
+++ b/python/pyspark/mllib/evaluation.py
@@ -27,9 +27,9 @@ class BinaryClassificationMetrics(JavaModelWrapper):
     >>> scoreAndLabels = sc.parallelize([
     ...     (0.1, 0.0), (0.1, 1.0), (0.4, 0.0), (0.6, 0.0), (0.6, 1.0), (0.6, 1.0), (0.8, 1.0)], 2)
     >>> metrics = BinaryClassificationMetrics(scoreAndLabels)
-    >>> metrics.areaUnderROC()
+    >>> metrics.areaUnderROC
     0.70...
-    >>> metrics.areaUnderPR()
+    >>> metrics.areaUnderPR
     0.83...
     >>> metrics.unpersist()
     """
@@ -47,6 +47,7 @@ def __init__(self, scoreAndLabels):
         java_model = java_class(df._jdf)
         super(BinaryClassificationMetrics, self).__init__(java_model)
 
+    @property
     def areaUnderROC(self):
         """
         Computes the area under the receiver operating characteristic
@@ -54,6 +55,7 @@ def areaUnderROC(self):
         """
         return self.call("areaUnderROC")
 
+    @property
     def areaUnderPR(self):
         """
         Computes the area under the precision-recall curve.
@@ -67,6 +69,78 @@ def unpersist(self):
         self.call("unpersist")
 
 
+class RegressionMetrics(JavaModelWrapper):
+    """
+    Evaluator for regression.
+
+    >>> predictionAndObservations = sc.parallelize([
+    ...     (2.5, 3.0), (0.0, -0.5), (2.0, 2.0), (8.0, 7.0)])
+    >>> metrics = RegressionMetrics(predictionAndObservations)
+    >>> metrics.explainedVariance
+    0.95...
+    >>> metrics.meanAbsoluteError
+    0.5...
+    >>> metrics.meanSquaredError
+    0.37...
+    >>> metrics.rootMeanSquaredError
+    0.61...
+    >>> metrics.r2
+    0.94...
+    """
+
+    def __init__(self, predictionAndObservations):
+        """
+        :param predictionAndObservations: an RDD of (prediction, observation) pairs.
+        """
+        sc = predictionAndObservations.ctx
+        sql_ctx = SQLContext(sc)
+        df = sql_ctx.createDataFrame(predictionAndObservations, schema=StructType([
+            StructField("prediction", DoubleType(), nullable=False),
+            StructField("observation", DoubleType(), nullable=False)]))
+        java_class = sc._jvm.org.apache.spark.mllib.evaluation.RegressionMetrics
+        java_model = java_class(df._jdf)
+        super(RegressionMetrics, self).__init__(java_model)
+
+    @property
+    def explainedVariance(self):
+        """
+        Returns the explained variance regression score.
+        explainedVariance = 1 - variance(y - \hat{y}) / variance(y)
+        """
+        return self.call("explainedVariance")
+
+    @property
+    def meanAbsoluteError(self):
+        """
+        Returns the mean absolute error, which is a risk function corresponding to the
+        expected value of the absolute error loss or l1-norm loss.
+        """
+        return self.call("meanAbsoluteError")
+
+    @property
+    def meanSquaredError(self):
+        """
+        Returns the mean squared error, which is a risk function corresponding to the
+        expected value of the squared error loss or quadratic loss.
+        """
+        return self.call("meanSquaredError")
+
+    @property
+    def rootMeanSquaredError(self):
+        """
+        Returns the root mean squared error, which is defined as the square root of
+        the mean squared error.
+        """
+        return self.call("rootMeanSquaredError")
+
+    @property
+    def r2(self):
+        """
+        Returns R^2^, the coefficient of determination.
+        """
+        return self.call("r2")
+
+
 def _test():
     import doctest
     from pyspark import SparkContext

From 5784c8d95561dce432a85401e1510776fdf723a8 Mon Sep 17 00:00:00 2001
From: Yin Huai <yhuai@databricks.com>
Date: Thu, 7 May 2015 11:46:49 -0700
Subject: [PATCH 22/48] [SPARK-1442] [SQL] [FOLLOW-UP] Address minor comments
 in Window Function PR (#5604).

Address marmbrus and scwf's comments in #5604.

Author: Yin Huai <yhuai@databricks.com>

Closes #5945 from yhuai/windowFollowup and squashes the following commits:

0ef879d [Yin Huai] Add collectFirst to TreeNode.
2373968 [Yin Huai] wip
4a16df9 [Yin Huai] Address minor comments for [SPARK-1442].
---
 .../sql/catalyst/analysis/Analyzer.scala      | 13 +++--
 .../spark/sql/catalyst/trees/TreeNode.scala   | 13 ++++-
 .../sql/catalyst/trees/TreeNodeSuite.scala    | 50 +++++++++++++++++++
 3 files changed, 68 insertions(+), 8 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 7b543b6c2aa42..7e46ad851cdd3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -638,11 +638,10 @@ class Analyzer(
     def addWindow(windowExpressions: Seq[NamedExpression], child: LogicalPlan): LogicalPlan = {
       // First, we group window expressions based on their Window Spec.
       val groupedWindowExpression = windowExpressions.groupBy { expr =>
-        val windowExpression = expr.find {
-          case window: WindowExpression => true
-          case other => false
-        }.map(_.asInstanceOf[WindowExpression].windowSpec)
-        windowExpression.getOrElse(
+        val windowSpec = expr.collectFirst {
+          case window: WindowExpression => window.windowSpec
+        }
+        windowSpec.getOrElse(
           failAnalysis(s"$windowExpressions does not have any WindowExpression."))
       }.toSeq
 
@@ -685,7 +684,7 @@ class Analyzer(
       case f @ Filter(condition, a @ Aggregate(groupingExprs, aggregateExprs, child))
         if child.resolved &&
            hasWindowFunction(aggregateExprs) &&
-           !a.expressions.exists(!_.resolved) =>
+           a.expressions.forall(_.resolved) =>
         val (windowExpressions, aggregateExpressions) = extract(aggregateExprs)
         // Create an Aggregate operator to evaluate aggregation functions.
         val withAggregate = Aggregate(groupingExprs, aggregateExpressions, child)
@@ -702,7 +701,7 @@ class Analyzer(
       // Aggregate without Having clause.
       case a @ Aggregate(groupingExprs, aggregateExprs, child)
         if hasWindowFunction(aggregateExprs) &&
-           !a.expressions.exists(!_.resolved) =>
+           a.expressions.forall(_.resolved) =>
         val (windowExpressions, aggregateExpressions) = extract(aggregateExprs)
         // Create an Aggregate operator to evaluate aggregation functions.
         val withAggregate = Aggregate(groupingExprs, aggregateExpressions, child)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
index 4b93f7d31b808..bc2ad34523d2c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
@@ -130,6 +130,17 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] {
     ret
   }
 
+  /**
+   * Finds and returns the first [[TreeNode]] of the tree for which the given partial function
+   * is defined (pre-order), and applies the partial function to it.
+   */
+  def collectFirst[B](pf: PartialFunction[BaseType, B]): Option[B] = {
+    val lifted = pf.lift
+    lifted(this).orElse {
+      children.foldLeft(None: Option[B]) { (l, r) => l.orElse(r.collectFirst(pf)) }
+    }
+  }
+
   /**
    * Returns a copy of this node where `f` has been applied to all the nodes children.
    */
@@ -160,7 +171,7 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] {
     val remainingNewChildren = newChildren.toBuffer
     val remainingOldChildren = children.toBuffer
     val newArgs = productIterator.map {
-      // This rule is used to handle children is a input argument.
+      // Handle Seq[TreeNode] in TreeNode parameters.
       case s: Seq[_] => s.map {
         case arg: TreeNode[_] if children contains arg =>
           val newChild = remainingNewChildren.remove(0)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala
index 786ddba403f2c..3d10dab5ba34c 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala
@@ -172,4 +172,54 @@ class TreeNodeSuite extends FunSuite {
     expected = None
     assert(expected === actual)
   }
+
+  test("collectFirst") {
+    val expression = Add(Literal(1), Multiply(Literal(2), Subtract(Literal(3), Literal(4))))
+
+    // Collect the top node.
+    {
+      val actual = expression.collectFirst {
+        case add: Add => add
+      }
+      val expected =
+        Some(Add(Literal(1), Multiply(Literal(2), Subtract(Literal(3), Literal(4)))))
+      assert(expected === actual)
+    }
+
+    // Collect the first children.
+    {
+      val actual = expression.collectFirst {
+        case l @ Literal(1, IntegerType) => l
+      }
+      val expected = Some(Literal(1))
+      assert(expected === actual)
+    }
+
+    // Collect an internal node (Subtract).
+    {
+      val actual = expression.collectFirst {
+        case sub: Subtract => sub
+      }
+      val expected = Some(Subtract(Literal(3), Literal(4)))
+      assert(expected === actual)
+    }
+
+    // Collect a leaf node.
+    {
+      val actual = expression.collectFirst {
+        case l @ Literal(3, IntegerType) => l
+      }
+      val expected = Some(Literal(3))
+      assert(expected === actual)
+    }
+
+    // Collect nothing.
+    {
+      val actual = expression.collectFirst {
+        case l @ Literal(100, IntegerType) => l
+      }
+      val expected = None
+      assert(expected === actual)
+    }
+  }
 }

From dec8f53719597119034dffbe43b2a9e5fd963083 Mon Sep 17 00:00:00 2001
From: ksonj <kson@siberie.de>
Date: Thu, 7 May 2015 12:04:19 -0700
Subject: [PATCH 23/48] [SPARK-7116] [SQL] [PYSPARK] Remove cache() causing
 memory leak

This patch simply removes a `cache()` on an intermediate RDD when evaluating Python UDFs.

Author: ksonj <kson@siberie.de>

Closes #5973 from ksonj/udf and squashes the following commits:

db5b564 [ksonj] removed TODO about cleaning up
fe70c54 [ksonj] Remove cache() causing memory leak
---
 .../scala/org/apache/spark/sql/execution/pythonUdfs.scala  | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/pythonUdfs.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/pythonUdfs.scala
index 7a43bfd8bc8d9..58cb1980f217e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/pythonUdfs.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/pythonUdfs.scala
@@ -219,8 +219,8 @@ case class EvaluatePython(
 
 /**
  * :: DeveloperApi ::
- * Uses PythonRDD to evaluate a [[PythonUDF]], one partition of tuples at a time.  The input
- * data is cached and zipped with the result of the udf evaluation.
+ * Uses PythonRDD to evaluate a [[PythonUDF]], one partition of tuples at a time.
+ * The input data is zipped with the result of the udf evaluation.
  */
 @DeveloperApi
 case class BatchPythonEvaluation(udf: PythonUDF, output: Seq[Attribute], child: SparkPlan)
@@ -229,8 +229,7 @@ case class BatchPythonEvaluation(udf: PythonUDF, output: Seq[Attribute], child:
   def children: Seq[SparkPlan] = child :: Nil
 
   def execute(): RDD[Row] = {
-    // TODO: Clean up after ourselves?
-    val childResults = child.execute().map(_.copy()).cache()
+    val childResults = child.execute().map(_.copy())
 
     val parent = childResults.mapPartitions { iter =>
       val pickle = new Pickler

From 074d75d4c8ce7c0296ded8f4c4868a9210320222 Mon Sep 17 00:00:00 2001
From: Cheng Hao <hao.cheng@intel.com>
Date: Thu, 7 May 2015 12:09:54 -0700
Subject: [PATCH 24/48] [SPARK-5213] [SQL] Remove the duplicated SparkSQLParser

This is a follow up of #5827 to remove the additional `SparkSQLParser`

Author: Cheng Hao <hao.cheng@intel.com>

Closes #5965 from chenghao-intel/remove_sparksqlparser and squashes the following commits:

509a233 [Cheng Hao] Remove the HiveQlQueryExecution
a5f9e3b [Cheng Hao] Remove the duplicated SparkSQLParser
---
 .../org/apache/spark/sql/hive/HiveQl.scala      |  7 ++-----
 .../apache/spark/sql/hive/test/TestHive.scala   | 17 ++++-------------
 .../sql/hive/execution/HiveComparisonTest.scala |  8 ++++----
 .../spark/sql/hive/execution/PruningSuite.scala |  2 +-
 4 files changed, 11 insertions(+), 23 deletions(-)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
index 8a0686a2d81c9..4e51473979a2a 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
@@ -28,7 +28,7 @@ import org.apache.hadoop.hive.ql.lib.Node
 import org.apache.hadoop.hive.ql.metadata.Table
 import org.apache.hadoop.hive.ql.parse._
 import org.apache.hadoop.hive.ql.plan.PlanUtils
-import org.apache.spark.sql.{AnalysisException, SparkSQLParser}
+import org.apache.spark.sql.AnalysisException
 
 import org.apache.spark.sql.catalyst.analysis._
 import org.apache.spark.sql.catalyst.expressions._
@@ -140,10 +140,7 @@ private[hive] object HiveQl {
     "TOK_TRUNCATETABLE"     // truncate table" is a NativeCommand, does not need to explain.
   ) ++ nativeCommands
 
-  protected val hqlParser = {
-    val fallback = new ExtendedHiveQlParser
-    new SparkSQLParser(fallback.parse(_))
-  }
+  protected val hqlParser = new ExtendedHiveQlParser
 
   /**
    * A set of implicit transformations that allow Hive ASTNodes to be rewritten by transformations
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
index edeab5158df62..ca84b43a998b8 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
@@ -94,7 +94,8 @@ class TestHiveContext(sc: SparkContext) extends HiveContext(sc) {
   lazy val hiveDevHome = envVarToFile("HIVE_DEV_HOME")
 
   // Override so we can intercept relative paths and rewrite them to point at hive.
-  override def runSqlHive(sql: String): Seq[String] = super.runSqlHive(rewritePaths(sql))
+  override def runSqlHive(sql: String): Seq[String] =
+    super.runSqlHive(rewritePaths(substitutor.substitute(this.hiveconf, sql)))
 
   override def executePlan(plan: LogicalPlan): this.QueryExecution =
     new this.QueryExecution(plan)
@@ -157,22 +158,12 @@ class TestHiveContext(sc: SparkContext) extends HiveContext(sc) {
 
   val describedTable = "DESCRIBE (\\w+)".r
 
-  val vs = new VariableSubstitution()
-
-  // we should substitute variables in hql to pass the text to parseSql() as a parameter.
-  // Hive parser need substituted text. HiveContext.sql() does this but return a DataFrame,
-  // while we need a logicalPlan so we cannot reuse that.
-  protected[hive] class HiveQLQueryExecution(hql: String)
-    extends this.QueryExecution(HiveQl.parseSql(vs.substitute(hiveconf, hql))) {
-    def hiveExec(): Seq[String] = runSqlHive(hql)
-    override def toString: String = hql + "\n" + super.toString
-  }
-
   /**
    * Override QueryExecution with special debug workflow.
    */
   class QueryExecution(logicalPlan: LogicalPlan)
     extends super.QueryExecution(logicalPlan) {
+    def this(sql: String) = this(parseSql(sql))
     override lazy val analyzed = {
       val describedTables = logical match {
         case HiveNativeCommand(describedTable(tbl)) => tbl :: Nil
@@ -196,7 +187,7 @@ class TestHiveContext(sc: SparkContext) extends HiveContext(sc) {
 
   protected[hive] implicit class SqlCmd(sql: String) {
     def cmd: () => Unit = {
-      () => new HiveQLQueryExecution(sql).stringResult(): Unit
+      () => new QueryExecution(sql).stringResult(): Unit
     }
   }
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
index 5ead5f3c19908..a3eacbd4e3981 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
@@ -129,7 +129,7 @@ abstract class HiveComparisonTest
   }
 
   protected def prepareAnswer(
-    hiveQuery: TestHive.type#HiveQLQueryExecution,
+    hiveQuery: TestHive.type#QueryExecution,
     answer: Seq[String]): Seq[String] = {
 
     def isSorted(plan: LogicalPlan): Boolean = plan match {
@@ -298,7 +298,7 @@ abstract class HiveComparisonTest
             hiveCachedResults
           } else {
 
-            val hiveQueries = queryList.map(new TestHive.HiveQLQueryExecution(_))
+            val hiveQueries = queryList.map(new TestHive.QueryExecution(_))
             // Make sure we can at least parse everything before attempting hive execution.
             hiveQueries.foreach(_.logical)
             val computedResults = (queryList.zipWithIndex, hiveQueries, hiveCacheFiles).zipped.map {
@@ -346,7 +346,7 @@ abstract class HiveComparisonTest
 
         // Run w/ catalyst
         val catalystResults = queryList.zip(hiveResults).map { case (queryString, hive) =>
-          val query = new TestHive.HiveQLQueryExecution(queryString)
+          val query = new TestHive.QueryExecution(queryString)
           try { (query, prepareAnswer(query, query.stringResult())) } catch {
             case e: Throwable =>
               val errorMessage =
@@ -402,7 +402,7 @@ abstract class HiveComparisonTest
             // okay by running a simple query. If this fails then we halt testing since
             // something must have gone seriously wrong.
             try {
-              new TestHive.HiveQLQueryExecution("SELECT key FROM src").stringResult()
+              new TestHive.QueryExecution("SELECT key FROM src").stringResult()
               TestHive.runSqlHive("SELECT key FROM src")
             } catch {
               case e: Exception =>
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruningSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruningSuite.scala
index 067b577f1560e..45f10e2fe64aa 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruningSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruningSuite.scala
@@ -145,7 +145,7 @@ class PruningSuite extends HiveComparisonTest with BeforeAndAfter {
       expectedScannedColumns: Seq[String],
       expectedPartValues: Seq[Seq[String]]): Unit = {
     test(s"$testCaseName - pruning test") {
-      val plan = new TestHive.HiveQLQueryExecution(sql).executedPlan
+      val plan = new TestHive.QueryExecution(sql).executedPlan
       val actualOutputColumns = plan.output.map(_.name)
       val (actualScannedColumns, actualPartValues) = plan.collect {
         case p @ HiveTableScan(columns, relation, _) =>

From 0c33bf817cef457873007959f3539f93f907dccd Mon Sep 17 00:00:00 2001
From: Tijo Thomas <tijoparacka@gmail.com>
Date: Thu, 7 May 2015 12:21:09 -0700
Subject: [PATCH 25/48] [SPARK-7399] [SPARK CORE] Fixed compilation error in
 scala 2.11

scala has deterministic naming-scheme for the generated methods which return default arguments . here one of the default argument of overloaded method has to be removed

Author: Tijo Thomas <tijoparacka@gmail.com>

Closes #5966 from tijoparacka/fix_compilation_error_in_scala2.11 and squashes the following commits:

c90bba8 [Tijo Thomas] Fixed compilation error in scala 2.11
---
 .../src/main/scala/org/apache/spark/rdd/RDDOperationScope.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/core/src/main/scala/org/apache/spark/rdd/RDDOperationScope.scala b/core/src/main/scala/org/apache/spark/rdd/RDDOperationScope.scala
index 537b56b49f866..9440d456edf15 100644
--- a/core/src/main/scala/org/apache/spark/rdd/RDDOperationScope.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/RDDOperationScope.scala
@@ -111,7 +111,7 @@ private[spark] object RDDOperationScope {
   private[spark] def withScope[T](
       sc: SparkContext,
       name: String,
-      allowNesting: Boolean = false)(body: => T): T = {
+      allowNesting: Boolean)(body: => T): T = {
     // Save the old scope to restore it later
     val scopeKey = SparkContext.RDD_SCOPE_KEY
     val noOverrideKey = SparkContext.RDD_SCOPE_NO_OVERRIDE_KEY

From 4eecf550aa7e4fb448baca82281bfd4e8bc4a778 Mon Sep 17 00:00:00 2001
From: Timothy Chen <tnachen@gmail.com>
Date: Thu, 7 May 2015 12:23:16 -0700
Subject: [PATCH 26/48] [SPARK-7373] [MESOS] Add docker support for launching
 drivers in mesos cluster mode.

Using the existing docker support for mesos, also enabling the mesos cluster mode scheduler to launch Spark drivers in docker images as well.

This also allows the executors launched by the drivers to be also in the same Docker image by passing  the docker settings.

Author: Timothy Chen <tnachen@gmail.com>

Closes #5917 from tnachen/spark_cluster_docker and squashes the following commits:

1e842f5 [Timothy Chen] Add docker support for launching drivers in mesos cluster mode.
---
 .../cluster/mesos/MesosClusterScheduler.scala | 30 ++++++++++++++-----
 1 file changed, 23 insertions(+), 7 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala
index 06f0e2881c344..1067a7f1caf4c 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala
@@ -370,16 +370,21 @@ private[spark] class MesosClusterScheduler(
     val executorOpts = desc.schedulerProperties.map { case (k, v) => s"-D$k=$v" }.mkString(" ")
     envBuilder.addVariables(
       Variable.newBuilder().setName("SPARK_EXECUTOR_OPTS").setValue(executorOpts))
-    val cmdOptions = generateCmdOption(desc)
+    val cmdOptions = generateCmdOption(desc).mkString(" ")
+    val dockerDefined = desc.schedulerProperties.contains("spark.mesos.executor.docker.image")
     val executorUri = desc.schedulerProperties.get("spark.executor.uri")
       .orElse(desc.command.environment.get("SPARK_EXECUTOR_URI"))
     val appArguments = desc.command.arguments.mkString(" ")
-    val cmd = if (executorUri.isDefined) {
+    val (executable, jar) = if (dockerDefined) {
+      // Application jar is automatically downloaded in the mounted sandbox by Mesos,
+      // and the path to the mounted volume is stored in $MESOS_SANDBOX env variable.
+      ("./bin/spark-submit", s"$$MESOS_SANDBOX/${desc.jarUrl.split("/").last}")
+    } else if (executorUri.isDefined) {
       builder.addUris(CommandInfo.URI.newBuilder().setValue(executorUri.get).build())
       val folderBasename = executorUri.get.split('/').last.split('.').head
       val cmdExecutable = s"cd $folderBasename*; $prefixEnv bin/spark-submit"
       val cmdJar = s"../${desc.jarUrl.split("/").last}"
-      s"$cmdExecutable ${cmdOptions.mkString(" ")} $cmdJar $appArguments"
+      (cmdExecutable, cmdJar)
     } else {
       val executorSparkHome = desc.schedulerProperties.get("spark.mesos.executor.home")
         .orElse(conf.getOption("spark.home"))
@@ -389,9 +394,9 @@ private[spark] class MesosClusterScheduler(
         }
       val cmdExecutable = new File(executorSparkHome, "./bin/spark-submit").getCanonicalPath
       val cmdJar = desc.jarUrl.split("/").last
-      s"$cmdExecutable ${cmdOptions.mkString(" ")} $cmdJar $appArguments"
+      (cmdExecutable, cmdJar)
     }
-    builder.setValue(cmd)
+    builder.setValue(s"$executable $cmdOptions $jar $appArguments")
     builder.setEnvironment(envBuilder.build())
     builder.build()
   }
@@ -458,9 +463,20 @@ private[spark] class MesosClusterScheduler(
           .setCommand(commandInfo)
           .addResources(cpuResource)
           .addResources(memResource)
-          .build()
+        submission.schedulerProperties.get("spark.mesos.executor.docker.image").foreach { image =>
+          val container = taskInfo.getContainerBuilder()
+          val volumes = submission.schedulerProperties
+            .get("spark.mesos.executor.docker.volumes")
+            .map(MesosSchedulerBackendUtil.parseVolumesSpec)
+          val portmaps = submission.schedulerProperties
+            .get("spark.mesos.executor.docker.portmaps")
+            .map(MesosSchedulerBackendUtil.parsePortMappingsSpec)
+          MesosSchedulerBackendUtil.addDockerInfo(
+            container, image, volumes = volumes, portmaps = portmaps)
+          taskInfo.setContainer(container.build())
+        }
         val queuedTasks = tasks.getOrElseUpdate(offer.offer.getId, new ArrayBuffer[TaskInfo])
-        queuedTasks += taskInfo
+        queuedTasks += taskInfo.build()
         logTrace(s"Using offer ${offer.offer.getId.getValue} to launch driver " +
           submission.submissionId)
         val newState = new MesosClusterSubmissionState(submission, taskId, offer.offer.getSlaveId,

From f1216514b830eadcdfff6fca044afd6ae1585800 Mon Sep 17 00:00:00 2001
From: Andrew Or <andrew@databricks.com>
Date: Thu, 7 May 2015 12:29:18 -0700
Subject: [PATCH 27/48] [SPARK-7391] DAG visualization: auto expand if linked
 from another viz

This is an addition to #5729.

If you click into a stage from the DAG viz on the job page, you might expect to expand on the stage. However, once you get to the stage page, you actually have to expand the DAG viz there yourself.

This patch makes this happen automatically. It's a small UX improvement.

Author: Andrew Or <andrew@databricks.com>

Closes #5958 from andrewor14/viz-auto-expand and squashes the following commits:

03cd157 [Andrew Or] Automatically expand DAG viz if from job page
---
 .../org/apache/spark/ui/static/spark-dag-viz.js      |  4 ++--
 .../src/main/scala/org/apache/spark/ui/UIUtils.scala | 10 +++++++++-
 .../scala/org/apache/spark/ui/jobs/StagePage.scala   | 12 ++++++++++++
 3 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js b/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js
index 76eb2c464139f..cda27cad7ee9d 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js
@@ -178,8 +178,8 @@ function renderDagVizForJob(svgContainer) {
     var stageId = metadata.attr("stage-id");
     var containerId = VizConstants.graphPrefix + stageId;
     // Link each graph to the corresponding stage page (TODO: handle stage attempts)
-    var stageLink =
-      "/stages/stage/?id=" + stageId.replace(VizConstants.stagePrefix, "") + "&attempt=0";
+    var stageLink = "/stages/stage/?id=" +
+      stageId.replace(VizConstants.stagePrefix, "") + "&attempt=0&expandDagViz=true";
     var container = svgContainer
       .append("a")
       .attr("xlink:href", stageLink)
diff --git a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
index e2d03f8342315..97eed13c2d780 100644
--- a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
+++ b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
@@ -20,7 +20,7 @@ package org.apache.spark.ui
 import java.text.SimpleDateFormat
 import java.util.{Locale, Date}
 
-import scala.xml.{Node, Text}
+import scala.xml.{Node, Text, Unparsed}
 
 import org.apache.spark.Logging
 import org.apache.spark.ui.scope.RDDOperationGraph
@@ -371,4 +371,12 @@ private[spark] object UIUtils extends Logging {
       </div>
     </div>
   }
+
+  /** Return a script element that automatically expands the DAG visualization on page load. */
+  def expandDagVizOnLoad(forJob: Boolean): Seq[Node] = {
+    <script type="text/javascript">
+      {Unparsed("$(document).ready(function() { toggleDagViz(" + forJob + ") });")}
+    </script>
+  }
+
 }
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala
index 579310070c76c..6c4305873cbd9 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala
@@ -44,6 +44,10 @@ private[ui] class StagePage(parent: StagesTab) extends WebUIPage("stage") {
       val parameterAttempt = request.getParameter("attempt")
       require(parameterAttempt != null && parameterAttempt.nonEmpty, "Missing attempt parameter")
 
+      // If this is set, expand the dag visualization by default
+      val expandDagVizParam = request.getParameter("expandDagViz")
+      val expandDagViz = expandDagVizParam != null && expandDagVizParam.toBoolean
+
       val stageId = parameterId.toInt
       val stageAttemptId = parameterAttempt.toInt
       val stageDataOption = progressListener.stageIdToData.get((stageId, stageAttemptId))
@@ -174,6 +178,13 @@ private[ui] class StagePage(parent: StagesTab) extends WebUIPage("stage") {
       val dagViz = UIUtils.showDagVizForStage(
         stageId, operationGraphListener.getOperationGraphForStage(stageId))
 
+      val maybeExpandDagViz: Seq[Node] =
+        if (expandDagViz) {
+          UIUtils.expandDagVizOnLoad(forJob = false)
+        } else {
+          Seq.empty
+        }
+
       val accumulableHeaders: Seq[String] = Seq("Accumulable", "Value")
       def accumulableRow(acc: AccumulableInfo): Elem =
         <tr><td>{acc.name}</td><td>{acc.value}</td></tr>
@@ -440,6 +451,7 @@ private[ui] class StagePage(parent: StagesTab) extends WebUIPage("stage") {
         summary ++
         showAdditionalMetrics ++
         dagViz ++
+        maybeExpandDagViz ++
         <h4>Summary Metrics for {numCompleted} Completed Tasks</h4> ++
         <div>{summaryTable.getOrElse("No tasks have reported metrics yet.")}</div> ++
         <h4>Aggregated Metrics by Executor</h4> ++ executorTable.toNodeSeq ++

From 88717ee4e7542ac8d5d2e5756c912dd390b37e88 Mon Sep 17 00:00:00 2001
From: Andrew Or <andrew@databricks.com>
Date: Thu, 7 May 2015 12:29:56 -0700
Subject: [PATCH 28/48] [SPARK-7347] DAG visualization: add tooltips to RDDs

This is an addition to #5729.

Here's an example with ALS.
<img src="https://issues.apache.org/jira/secure/attachment/12731039/tooltip.png" width="400px"></img>

Author: Andrew Or <andrew@databricks.com>

Closes #5957 from andrewor14/viz-hover2 and squashes the following commits:

60e3758 [Andrew Or] Add tooltips for RDDs on job page
---
 .../org/apache/spark/ui/static/spark-dag-viz.js  | 16 ++++++++++++++++
 .../spark/ui/scope/RDDOperationGraph.scala       |  5 +++--
 2 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js b/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js
index cda27cad7ee9d..a0e3e914c2547 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js
@@ -218,6 +218,7 @@ function renderDagVizForJob(svgContainer) {
     });
   });
 
+  addTooltipsForRDDs(svgContainer);
   drawCrossStageEdges(crossStageEdges, svgContainer);
 }
 
@@ -424,6 +425,21 @@ function connectRDDs(fromRDDId, toRDDId, edgesContainer, svgContainer) {
   edgesContainer.append("path").datum(points).attr("d", line);
 }
 
+/* (Job page only) Helper function to add tooltips for RDDs. */
+function addTooltipsForRDDs(svgContainer) {
+  svgContainer.selectAll("g.node").each(function() {
+    var node = d3.select(this);
+    var tooltipText = node.attr("name");
+    if (tooltipText) {
+      node.select("circle")
+        .attr("data-toggle", "tooltip")
+        .attr("data-placement", "right")
+        .attr("title", tooltipText)
+    }
+  });
+  $("[data-toggle=tooltip]").tooltip({container: "body"});
+}
+
 /* Helper function to convert attributes to numeric values. */
 function toFloat(f) {
   if (f) {
diff --git a/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraph.scala b/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraph.scala
index edf005f7f325a..2b2db9e62be4e 100644
--- a/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraph.scala
+++ b/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraph.scala
@@ -178,10 +178,11 @@ private[ui] object RDDOperationGraph extends Logging {
    * On the stage page, it is displayed as a box with an embedded label.
    */
   private def makeDotNode(node: RDDOperationNode, forJob: Boolean): String = {
+    val label = s"${node.name} (${node.id})"
     if (forJob) {
-      s"""${node.id} [label=" " shape="circle" padding="5" labelStyle="font-size: 0"]"""
+      s"""${node.id} [label="$label" shape="circle" padding="5" labelStyle="font-size: 0"]"""
     } else {
-      s"""${node.id} [label="${node.name} (${node.id})" padding="5" labelStyle="font-size: 10"]"""
+      s"""${node.id} [label="$label" padding="5" labelStyle="font-size: 10"]"""
     }
   }
 

From 347a329a36c94ff37363e4dffcbd5a24dc6a6714 Mon Sep 17 00:00:00 2001
From: MechCoder <manojkumarsivaraj334@gmail.com>
Date: Thu, 7 May 2015 14:02:05 -0700
Subject: [PATCH 29/48] [SPARK-7328] [MLLIB] [PYSPARK]
 Pyspark.mllib.linalg.Vectors: Missing items

Add
1. Class methods squared_dist
3. parse
4. norm
5. numNonzeros
6. copy

I made a few vectorizations wrt squared_dist and dot as well. I have added support for SparseMatrix serialization in a separate PR (https://github.com/apache/spark/pull/5775) and plan to complete support for Matrices in another PR.

Author: MechCoder <manojkumarsivaraj334@gmail.com>

Closes #5872 from MechCoder/local_linalg_api and squashes the following commits:

a8ff1e0 [MechCoder] minor
ce3e53e [MechCoder] Add error message for parser
1bd3c04 [MechCoder] Robust parser and removed unnecessary methods
f779561 [MechCoder] [SPARK-7328] Pyspark.mllib.linalg.Vectors: Missing items
---
 python/pyspark/mllib/linalg.py | 148 ++++++++++++++++++++++++++++++++-
 python/pyspark/mllib/tests.py  |  25 +++++-
 2 files changed, 171 insertions(+), 2 deletions(-)

diff --git a/python/pyspark/mllib/linalg.py b/python/pyspark/mllib/linalg.py
index 9f3b0baf9f19f..23d1a79ffe511 100644
--- a/python/pyspark/mllib/linalg.py
+++ b/python/pyspark/mllib/linalg.py
@@ -208,9 +208,46 @@ def __init__(self, ar):
             ar = ar.astype(np.float64)
         self.array = ar
 
+    @staticmethod
+    def parse(s):
+        """
+        Parse string representation back into the DenseVector.
+
+        >>> DenseVector.parse(' [ 0.0,1.0,2.0,  3.0]')
+        DenseVector([0.0, 1.0, 2.0, 3.0])
+        """
+        start = s.find('[')
+        if start == -1:
+            raise ValueError("Array should start with '['.")
+        end = s.find(']')
+        if end == -1:
+            raise ValueError("Array should end with ']'.")
+        s = s[start + 1: end]
+
+        try:
+            values = [float(val) for val in s.split(',')]
+        except ValueError:
+            raise ValueError("Unable to parse values from %s" % s)
+        return DenseVector(values)
+
     def __reduce__(self):
         return DenseVector, (self.array.tostring(),)
 
+    def numNonzeros(self):
+        return np.count_nonzero(self.array)
+
+    def norm(self, p):
+        """
+        Calculte the norm of a DenseVector.
+
+        >>> a = DenseVector([0, -1, 2, -3])
+        >>> a.norm(2)
+        3.7...
+        >>> a.norm(1)
+        6.0
+        """
+        return np.linalg.norm(self.array, p)
+
     def dot(self, other):
         """
         Compute the dot product of two Vectors. We support
@@ -387,8 +424,74 @@ def __init__(self, size, *args):
                 if self.indices[i] >= self.indices[i + 1]:
                     raise TypeError("indices array must be sorted")
 
+    def numNonzeros(self):
+        return np.count_nonzero(self.values)
+
+    def norm(self, p):
+        """
+        Calculte the norm of a SparseVector.
+
+        >>> a = SparseVector(4, [0, 1], [3., -4.])
+        >>> a.norm(1)
+        7.0
+        >>> a.norm(2)
+        5.0
+        """
+        return np.linalg.norm(self.values, p)
+
     def __reduce__(self):
-        return (SparseVector, (self.size, self.indices.tostring(), self.values.tostring()))
+        return (
+            SparseVector,
+            (self.size, self.indices.tostring(), self.values.tostring()))
+
+    @staticmethod
+    def parse(s):
+        """
+        Parse string representation back into the DenseVector.
+
+        >>> SparseVector.parse(' (4, [0,1 ],[ 4.0,5.0] )')
+        SparseVector(4, {0: 4.0, 1: 5.0})
+        """
+        start = s.find('(')
+        if start == -1:
+            raise ValueError("Tuple should start with '('")
+        end = s.find(')')
+        if start == -1:
+            raise ValueError("Tuple should end with ')'")
+        s = s[start + 1: end].strip()
+
+        size = s[: s.find(',')]
+        try:
+            size = int(size)
+        except ValueError:
+            raise ValueError("Cannot parse size %s." % size)
+
+        ind_start = s.find('[')
+        if ind_start == -1:
+            raise ValueError("Indices array should start with '['.")
+        ind_end = s.find(']')
+        if ind_end == -1:
+            raise ValueError("Indices array should end with ']'")
+        new_s = s[ind_start + 1: ind_end]
+        ind_list = new_s.split(',')
+        try:
+            indices = [int(ind) for ind in ind_list]
+        except ValueError:
+            raise ValueError("Unable to parse indices from %s." % new_s)
+        s = s[ind_end + 1:].strip()
+
+        val_start = s.find('[')
+        if val_start == -1:
+            raise ValueError("Values array should start with '['.")
+        val_end = s.find(']')
+        if val_end == -1:
+            raise ValueError("Values array should end with ']'.")
+        val_list = s[val_start + 1: val_end].split(',')
+        try:
+            values = [float(val) for val in val_list]
+        except ValueError:
+            raise ValueError("Unable to parse values from %s." % s)
+        return SparseVector(size, indices, values)
 
     def dot(self, other):
         """
@@ -633,6 +736,49 @@ def stringify(vector):
         """
         return str(vector)
 
+    @staticmethod
+    def squared_distance(v1, v2):
+        """
+        Squared distance between two vectors.
+        a and b can be of type SparseVector, DenseVector, np.ndarray
+        or array.array.
+
+        >>> a = Vectors.sparse(4, [(0, 1), (3, 4)])
+        >>> b = Vectors.dense([2, 5, 4, 1])
+        >>> a.squared_distance(b)
+        51.0
+        """
+        v1, v2 = _convert_to_vector(v1), _convert_to_vector(v2)
+        return v1.squared_distance(v2)
+
+    @staticmethod
+    def norm(vector, p):
+        """
+        Find norm of the given vector.
+        """
+        return _convert_to_vector(vector).norm(p)
+
+    @staticmethod
+    def parse(s):
+        """Parse a string representation back into the Vector.
+
+        >>> Vectors.parse('[2,1,2 ]')
+        DenseVector([2.0, 1.0, 2.0])
+        >>> Vectors.parse(' ( 100,  [0],  [2])')
+        SparseVector(100, {0: 2.0})
+        """
+        if s.find('(') == -1 and s.find('[') != -1:
+            return DenseVector.parse(s)
+        elif s.find('(') != -1:
+            return SparseVector.parse(s)
+        else:
+            raise ValueError(
+                "Cannot find tokens '[' or '(' from the input string.")
+
+    @staticmethod
+    def zeros(size):
+        return DenseVector(np.zeros(size))
+
 
 class Matrix(object):
     """
diff --git a/python/pyspark/mllib/tests.py b/python/pyspark/mllib/tests.py
index d05cfe2af04b2..36a4c7a5408c6 100644
--- a/python/pyspark/mllib/tests.py
+++ b/python/pyspark/mllib/tests.py
@@ -24,7 +24,7 @@
 import tempfile
 import array as pyarray
 
-from numpy import array, array_equal, zeros
+from numpy import array, array_equal, zeros, inf
 from py4j.protocol import Py4JJavaError
 
 if sys.version_info[:2] <= (2, 6):
@@ -220,6 +220,29 @@ def test_dense_matrix_is_transposed(self):
         self.assertTrue(array_equal(sm.colPtrs, [0, 2, 5]))
         self.assertTrue(array_equal(sm.values, [1, 3, 4, 6, 9]))
 
+    def test_parse_vector(self):
+        a = DenseVector([3, 4, 6, 7])
+        self.assertTrue(str(a), '[3.0,4.0,6.0,7.0]')
+        self.assertTrue(Vectors.parse(str(a)), a)
+        a = SparseVector(4, [0, 2], [3, 4])
+        self.assertTrue(str(a), '(4,[0,2],[3.0,4.0])')
+        self.assertTrue(Vectors.parse(str(a)), a)
+        a = SparseVector(10, [0, 1], [4, 5])
+        self.assertTrue(SparseVector.parse(' (10, [0,1 ],[ 4.0,5.0] )'), a)
+
+    def test_norms(self):
+        a = DenseVector([0, 2, 3, -1])
+        self.assertAlmostEqual(a.norm(2), 3.742, 3)
+        self.assertTrue(a.norm(1), 6)
+        self.assertTrue(a.norm(inf), 3)
+        a = SparseVector(4, [0, 2], [3, -4])
+        self.assertAlmostEqual(a.norm(2), 5)
+        self.assertTrue(a.norm(1), 7)
+        self.assertTrue(a.norm(inf), 4)
+
+        tmp = SparseVector(4, [0, 2], [3, 0])
+        self.assertEqual(tmp.numNonzeros(), 1)
+
 
 class ListTests(MLlibTestCase):
 

From 658a478d3f86456df09d0fbb1ba438fb36d8725c Mon Sep 17 00:00:00 2001
From: Octavian Geagla <ogeagla@gmail.com>
Date: Thu, 7 May 2015 14:49:55 -0700
Subject: [PATCH 30/48] [SPARK-5726] [MLLIB] Elementwise (Hadamard) Vector
 Product Transformer

See https://issues.apache.org/jira/browse/SPARK-5726

Author: Octavian Geagla <ogeagla@gmail.com>
Author: Joseph K. Bradley <joseph@databricks.com>

Closes #4580 from ogeagla/spark-mllib-weighting and squashes the following commits:

fac12ad [Octavian Geagla] [SPARK-5726] [MLLIB] Use new createTransformFunc.
90f7e39 [Joseph K. Bradley] small cleanups
4595165 [Octavian Geagla] [SPARK-5726] [MLLIB] Remove erroneous test case.
ded3ac6 [Octavian Geagla] [SPARK-5726] [MLLIB] Pass style checks.
37d4705 [Octavian Geagla] [SPARK-5726] [MLLIB] Incorporated feedback.
1dffeee [Octavian Geagla] [SPARK-5726] [MLLIB] Pass style checks.
e436896 [Octavian Geagla] [SPARK-5726] [MLLIB] Remove 'TF' from 'ElementwiseProductTF'
cb520e6 [Octavian Geagla] [SPARK-5726] [MLLIB] Rename HadamardProduct to ElementwiseProduct
4922722 [Octavian Geagla] [SPARK-5726] [MLLIB] Hadamard Vector Product Transformer
---
 docs/mllib-feature-extraction.md              | 54 ++++++++++++++++
 .../spark/ml/feature/ElementwiseProduct.scala | 55 ++++++++++++++++
 .../mllib/feature/ElementwiseProduct.scala    | 64 +++++++++++++++++++
 .../feature/ElementwiseProductSuite.scala     | 61 ++++++++++++++++++
 4 files changed, 234 insertions(+)
 create mode 100644 mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala
 create mode 100644 mllib/src/main/scala/org/apache/spark/mllib/feature/ElementwiseProduct.scala
 create mode 100644 mllib/src/test/scala/org/apache/spark/mllib/feature/ElementwiseProductSuite.scala

diff --git a/docs/mllib-feature-extraction.md b/docs/mllib-feature-extraction.md
index 80842b27effd8..03fedd01016b9 100644
--- a/docs/mllib-feature-extraction.md
+++ b/docs/mllib-feature-extraction.md
@@ -477,3 +477,57 @@ sc.stop();
 </div>
 </div>
 
+## ElementwiseProduct
+
+ElementwiseProduct multiplies each input vector by a provided "weight" vector, using element-wise multiplication. In other words, it scales each column of the dataset by a scalar multiplier.  This represents the [Hadamard product](https://en.wikipedia.org/wiki/Hadamard_product_%28matrices%29) between the input vector, `v` and transforming vector, `w`, to yield a result vector.
+
+`\[ \begin{pmatrix}
+v_1 \\
+\vdots \\
+v_N
+\end{pmatrix} \circ \begin{pmatrix}
+                    w_1 \\
+                    \vdots \\
+                    w_N
+                    \end{pmatrix}
+= \begin{pmatrix}
+  v_1 w_1 \\
+  \vdots \\
+  v_N w_N
+  \end{pmatrix}
+\]`
+
+[`ElementwiseProduct`](api/scala/index.html#org.apache.spark.mllib.feature.ElementwiseProduct) has the following parameter in the constructor:
+
+* `w`: the transforming vector.
+
+`ElementwiseProduct` implements [`VectorTransformer`](api/scala/index.html#org.apache.spark.mllib.feature.VectorTransformer) which can apply the weighting on a `Vector` to produce a transformed `Vector` or on an `RDD[Vector]` to produce a transformed `RDD[Vector]`.
+
+### Example
+
+This example below demonstrates how to load a simple vectors file, extract a set of vectors, then transform those vectors using a transforming vector value.
+
+
+<div class="codetabs">
+<div data-lang="scala">
+{% highlight scala %}
+import org.apache.spark.SparkContext._
+import org.apache.spark.mllib.feature.ElementwiseProduct
+import org.apache.spark.mllib.linalg.Vectors
+
+// Load and parse the data:
+val data = sc.textFile("data/mllib/kmeans_data.txt")
+val parsedData = data.map(s => Vectors.dense(s.split(' ').map(_.toDouble)))
+
+val transformingVector = Vectors.dense(0.0, 1.0, 2.0)
+val transformer = new ElementwiseProduct(transformingVector)
+
+// Batch transform and per-row transform give the same results:
+val transformedData = transformer.transform(parsedData)
+val transformedData2 = parsedData.map(x => transformer.transform(x))
+
+{% endhighlight %}
+</div>
+</div>
+
+
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala
new file mode 100644
index 0000000000000..f8b56293e3ccc
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.feature
+
+import org.apache.spark.annotation.AlphaComponent
+import org.apache.spark.ml.UnaryTransformer
+import org.apache.spark.ml.param.Param
+import org.apache.spark.mllib.feature
+import org.apache.spark.mllib.linalg.{Vector, VectorUDT}
+import org.apache.spark.sql.types.DataType
+
+/**
+ * :: AlphaComponent ::
+ * Outputs the Hadamard product (i.e., the element-wise product) of each input vector with a
+ * provided "weight" vector.  In other words, it scales each column of the dataset by a scalar
+ * multiplier.
+ */
+@AlphaComponent
+class ElementwiseProduct extends UnaryTransformer[Vector, Vector, ElementwiseProduct] {
+
+  /**
+    * the vector to multiply with input vectors
+    * @group param
+    */
+  val scalingVec: Param[Vector] = new Param(this, "scalingVector", "vector for hadamard product")
+
+  /** @group setParam */
+  def setScalingVec(value: Vector): this.type = set(scalingVec, value)
+
+  /** @group getParam */
+  def getScalingVec: Vector = getOrDefault(scalingVec)
+
+  override protected def createTransformFunc: Vector => Vector = {
+    require(params.contains(scalingVec), s"transformation requires a weight vector")
+    val elemScaler = new feature.ElementwiseProduct($(scalingVec))
+    elemScaler.transform
+  }
+
+  override protected def outputDataType: DataType = new VectorUDT()
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/ElementwiseProduct.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/ElementwiseProduct.scala
new file mode 100644
index 0000000000000..b0985baf9b278
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/ElementwiseProduct.scala
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.feature
+
+import org.apache.spark.annotation.Experimental
+import org.apache.spark.mllib.linalg._
+
+/**
+ * :: Experimental ::
+ * Outputs the Hadamard product (i.e., the element-wise product) of each input vector with a
+ * provided "weight" vector. In other words, it scales each column of the dataset by a scalar
+ * multiplier.
+ * @param scalingVector The values used to scale the reference vector's individual components.
+ */
+@Experimental
+class ElementwiseProduct(val scalingVector: Vector) extends VectorTransformer {
+
+  /**
+   * Does the hadamard product transformation.
+   *
+   * @param vector vector to be transformed.
+   * @return transformed vector.
+   */
+  override def transform(vector: Vector): Vector = {
+    require(vector.size == scalingVector.size,
+      s"vector sizes do not match: Expected ${scalingVector.size} but found ${vector.size}")
+    vector match {
+      case dv: DenseVector =>
+        val values: Array[Double] = dv.values.clone()
+        val dim = scalingVector.size
+        var i = 0
+        while (i < dim) {
+          values(i) *= scalingVector(i)
+          i += 1
+        }
+        Vectors.dense(values)
+      case SparseVector(size, indices, vs) =>
+        val values = vs.clone()
+        val dim = values.length
+        var i = 0
+        while (i < dim) {
+          values(i) *= scalingVector(indices(i))
+          i += 1
+        }
+        Vectors.sparse(size, indices, values)
+      case v => throw new IllegalArgumentException("Does not support vector type " + v.getClass)
+    }
+  }
+}
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/feature/ElementwiseProductSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/feature/ElementwiseProductSuite.scala
new file mode 100644
index 0000000000000..f3a482abda873
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/mllib/feature/ElementwiseProductSuite.scala
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.feature
+
+import org.scalatest.FunSuite
+
+import org.apache.spark.mllib.linalg.{DenseVector, SparseVector, Vectors}
+import org.apache.spark.mllib.util.MLlibTestSparkContext
+import org.apache.spark.mllib.util.TestingUtils._
+
+class ElementwiseProductSuite extends FunSuite with MLlibTestSparkContext {
+
+  test("elementwise (hadamard) product should properly apply vector to dense data set") {
+    val denseData = Array(
+      Vectors.dense(1.0, 4.0, 1.9, -9.0)
+    )
+    val scalingVec = Vectors.dense(2.0, 0.5, 0.0, 0.25)
+    val transformer = new ElementwiseProduct(scalingVec)
+    val transformedData = transformer.transform(sc.makeRDD(denseData))
+    val transformedVecs = transformedData.collect()
+    val transformedVec = transformedVecs(0)
+    val expectedVec = Vectors.dense(2.0, 2.0, 0.0, -2.25)
+    assert(transformedVec ~== expectedVec absTol 1E-5,
+      s"Expected transformed vector $expectedVec but found $transformedVec")
+  }
+
+  test("elementwise (hadamard) product should properly apply vector to sparse data set") {
+    val sparseData = Array(
+      Vectors.sparse(3, Seq((1, -1.0), (2, -3.0)))
+    )
+    val dataRDD = sc.parallelize(sparseData, 3)
+    val scalingVec = Vectors.dense(1.0, 0.0, 0.5)
+    val transformer = new ElementwiseProduct(scalingVec)
+    val data2 = sparseData.map(transformer.transform)
+    val data2RDD = transformer.transform(dataRDD)
+
+    assert((sparseData, data2, data2RDD.collect()).zipped.forall {
+      case (v1: DenseVector, v2: DenseVector, v3: DenseVector) => true
+      case (v1: SparseVector, v2: SparseVector, v3: SparseVector) => true
+      case _ => false
+    }, "The vector type should be preserved after hadamard product")
+
+    assert((data2, data2RDD.collect()).zipped.forall((v1, v2) => v1 ~== v2 absTol 1E-5))
+    assert(data2(0) ~== Vectors.sparse(3, Seq((1, 0.0), (2, -1.5))) absTol 1E-5)
+  }
+}

From e43803b8f477b2c8d28836ac163cb54328d13f1a Mon Sep 17 00:00:00 2001
From: Xiangrui Meng <meng@databricks.com>
Date: Thu, 7 May 2015 15:45:37 -0700
Subject: [PATCH 31/48] [SPARK-6948] [MLLIB] compress vectors in
 VectorAssembler

The compression is based on storage. brkyvz

Author: Xiangrui Meng <meng@databricks.com>

Closes #5985 from mengxr/SPARK-6948 and squashes the following commits:

df56a00 [Xiangrui Meng] update python tests
6d90d45 [Xiangrui Meng] compress vectors in VectorAssembler
---
 .../org/apache/spark/ml/feature/VectorAssembler.scala  |  2 +-
 .../apache/spark/ml/feature/VectorAssemblerSuite.scala | 10 +++++++++-
 python/pyspark/ml/feature.py                           |  6 +++---
 3 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala
index b5a69cee6daf3..796758a70ef18 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala
@@ -102,6 +102,6 @@ object VectorAssembler {
       case o =>
         throw new SparkException(s"$o of type ${o.getClass.getName} is not supported.")
     }
-    Vectors.sparse(cur, indices.result(), values.result())
+    Vectors.sparse(cur, indices.result(), values.result()).compressed
   }
 }
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/VectorAssemblerSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/VectorAssemblerSuite.scala
index 57d0278e03639..0db27607bc274 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/VectorAssemblerSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/VectorAssemblerSuite.scala
@@ -20,7 +20,7 @@ package org.apache.spark.ml.feature
 import org.scalatest.FunSuite
 
 import org.apache.spark.SparkException
-import org.apache.spark.mllib.linalg.{Vector, Vectors}
+import org.apache.spark.mllib.linalg.{DenseVector, SparseVector, Vector, Vectors}
 import org.apache.spark.mllib.util.MLlibTestSparkContext
 import org.apache.spark.sql.{Row, SQLContext}
 
@@ -48,6 +48,14 @@ class VectorAssemblerSuite extends FunSuite with MLlibTestSparkContext {
     }
   }
 
+  test("assemble should compress vectors") {
+    import org.apache.spark.ml.feature.VectorAssembler.assemble
+    val v1 = assemble(0.0, 0.0, 0.0, Vectors.dense(4.0))
+    assert(v1.isInstanceOf[SparseVector])
+    val v2 = assemble(1.0, 2.0, 3.0, Vectors.sparse(1, Array(0), Array(4.0)))
+    assert(v2.isInstanceOf[DenseVector])
+  }
+
   test("VectorAssembler") {
     val df = sqlContext.createDataFrame(Seq(
       (0, 0.0, Vectors.dense(1.0, 2.0), "a", Vectors.sparse(2, Array(1), Array(3.0)), 10L)
diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py
index 8a0fdddd2d9b5..705a368192c24 100644
--- a/python/pyspark/ml/feature.py
+++ b/python/pyspark/ml/feature.py
@@ -121,12 +121,12 @@ class VectorAssembler(JavaTransformer, HasInputCols, HasOutputCol):
     >>> df = sc.parallelize([Row(a=1, b=0, c=3)]).toDF()
     >>> vecAssembler = VectorAssembler(inputCols=["a", "b", "c"], outputCol="features")
     >>> vecAssembler.transform(df).head().features
-    SparseVector(3, {0: 1.0, 2: 3.0})
+    DenseVector([1.0, 0.0, 3.0])
     >>> vecAssembler.setParams(outputCol="freqs").transform(df).head().freqs
-    SparseVector(3, {0: 1.0, 2: 3.0})
+    DenseVector([1.0, 0.0, 3.0])
     >>> params = {vecAssembler.inputCols: ["b", "a"], vecAssembler.outputCol: "vector"}
     >>> vecAssembler.transform(df, params).head().vector
-    SparseVector(2, {1: 1.0})
+    DenseVector([0.0, 1.0])
     """
 
     _java_class = "org.apache.spark.ml.feature.VectorAssembler"

From 97d1182af63d55abab44521171652c81c56c6af6 Mon Sep 17 00:00:00 2001
From: scwf <wangfei1@huawei.com>
Date: Thu, 7 May 2015 16:21:24 -0700
Subject: [PATCH 32/48] [SQL] [MINOR] make star and multialias extend
 NamedExpression

`Star` and `MultiAlias` just used in `analyzer` and them will be substituted after analyze,  So just like `Alias` they do not need extend `Attribute`

Author: scwf <wangfei1@huawei.com>

Closes #5928 from scwf/attribute and squashes the following commits:

73a0560 [scwf] star and multialias do not need extend attribute
---
 .../sql/catalyst/analysis/unresolved.scala    | 20 +++++--------------
 .../expressions/namedExpressions.scala        |  2 +-
 2 files changed, 6 insertions(+), 16 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
index 3f567e3e8b2a6..eb736ac3290e8 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
@@ -95,7 +95,7 @@ case class UnresolvedFunction(name: String, children: Seq[Expression]) extends E
  * Represents all of the input attributes to a given relational operator, for example in
  * "SELECT * FROM ...". A [[Star]] gets automatically expanded during analysis.
  */
-trait Star extends Attribute with trees.LeafNode[Expression] {
+trait Star extends NamedExpression with trees.LeafNode[Expression] {
   self: Product =>
 
   override def name: String = throw new UnresolvedException(this, "name")
@@ -103,13 +103,9 @@ trait Star extends Attribute with trees.LeafNode[Expression] {
   override def dataType: DataType = throw new UnresolvedException(this, "dataType")
   override def nullable: Boolean = throw new UnresolvedException(this, "nullable")
   override def qualifiers: Seq[String] = throw new UnresolvedException(this, "qualifiers")
+  override def toAttribute: Attribute = throw new UnresolvedException(this, "toAttribute")
   override lazy val resolved = false
 
-  override def newInstance(): Star = this
-  override def withNullability(newNullability: Boolean): Star = this
-  override def withQualifiers(newQualifiers: Seq[String]): Star = this
-  override def withName(newName: String): Star = this
-
   // Star gets expanded at runtime so we never evaluate a Star.
   override def eval(input: Row = null): EvaluatedType =
     throw new TreeNodeException(this, s"No function to evaluate expression. type: ${this.nodeName}")
@@ -154,7 +150,7 @@ case class UnresolvedStar(table: Option[String]) extends Star {
  * @param names the names to be associated with each output of computing [[child]].
  */
 case class MultiAlias(child: Expression, names: Seq[String])
-  extends Attribute with trees.UnaryNode[Expression] {
+  extends NamedExpression with trees.UnaryNode[Expression] {
 
   override def name: String = throw new UnresolvedException(this, "name")
 
@@ -166,15 +162,9 @@ case class MultiAlias(child: Expression, names: Seq[String])
 
   override def qualifiers: Seq[String] = throw new UnresolvedException(this, "qualifiers")
 
-  override lazy val resolved = false
-
-  override def newInstance(): MultiAlias = this
+  override def toAttribute: Attribute = throw new UnresolvedException(this, "toAttribute")
 
-  override def withNullability(newNullability: Boolean): MultiAlias = this
-
-  override def withQualifiers(newQualifiers: Seq[String]): MultiAlias = this
-
-  override def withName(newName: String): MultiAlias = this
+  override lazy val resolved = false
 
   override def eval(input: Row = null): EvaluatedType =
     throw new TreeNodeException(this, s"No function to evaluate expression. type: ${this.nodeName}")
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala
index 57ace2a14f77c..a9170589f8c6c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala
@@ -86,7 +86,7 @@ abstract class Attribute extends NamedExpression {
   def withQualifiers(newQualifiers: Seq[String]): Attribute
   def withName(newName: String): Attribute
 
-  def toAttribute: Attribute = this
+  override def toAttribute: Attribute = this
   def newInstance(): Attribute
 
 }

From ea3077f19c18b5556a3632b36771aeb153746ff5 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Thu, 7 May 2015 16:22:45 -0700
Subject: [PATCH 33/48] [SPARK-7277] [SQL] Throw exception if the property
 mapred.reduce.tasks is set to -1

JIRA: https://issues.apache.org/jira/browse/SPARK-7277

As automatically determining the number of reducers is not supported (`mapred.reduce.tasks` is set to `-1`), we should throw exception to users.

Author: Liang-Chi Hsieh <viirya@gmail.com>

Closes #5811 from viirya/no_neg_reduce_tasks and squashes the following commits:

e518f96 [Liang-Chi Hsieh] Consider other wrong setting values.
fd9c817 [Liang-Chi Hsieh] Merge remote-tracking branch 'upstream/master' into no_neg_reduce_tasks
4ede705 [Liang-Chi Hsieh] Throw exception instead of warning message.
68a1c70 [Liang-Chi Hsieh] Show warning message if mapred.reduce.tasks is set to -1.
---
 .../org/apache/spark/sql/execution/commands.scala      | 10 ++++++++--
 .../scala/org/apache/spark/sql/SQLQuerySuite.scala     | 10 ++++++++++
 2 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala
index 98df5bef34efa..65687db4e6235 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala
@@ -84,8 +84,14 @@ case class SetCommand(
       logWarning(
         s"Property ${SQLConf.Deprecated.MAPRED_REDUCE_TASKS} is deprecated, " +
           s"automatically converted to ${SQLConf.SHUFFLE_PARTITIONS} instead.")
-      sqlContext.setConf(SQLConf.SHUFFLE_PARTITIONS, value)
-      Seq(Row(s"${SQLConf.SHUFFLE_PARTITIONS}=$value"))
+      if (value.toInt < 1) {
+        val msg = s"Setting negative ${SQLConf.Deprecated.MAPRED_REDUCE_TASKS} for automatically " +
+          "determining the number of reducers is not supported."
+        throw new IllegalArgumentException(msg)
+      } else {
+        sqlContext.setConf(SQLConf.SHUFFLE_PARTITIONS, value)
+        Seq(Row(s"${SQLConf.SHUFFLE_PARTITIONS}=$value"))
+      }
 
     // Configures a single property.
     case Some((key, Some(value))) =>
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index 208cec6a32d4d..77be3b8b206c0 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -871,6 +871,16 @@ class SQLQuerySuite extends QueryTest with BeforeAndAfterAll {
     conf.clear()
   }
 
+  test("SET commands with illegal or inappropriate argument") {
+    conf.clear()
+    // Set negative mapred.reduce.tasks for automatically determing
+    // the number of reducers is not supported
+    intercept[IllegalArgumentException](sql(s"SET mapred.reduce.tasks=-1"))
+    intercept[IllegalArgumentException](sql(s"SET mapred.reduce.tasks=-01"))
+    intercept[IllegalArgumentException](sql(s"SET mapred.reduce.tasks=-2"))
+    conf.clear()
+  }
+
   test("apply schema") {
     val schema1 = StructType(
       StructField("f1", IntegerType, false) ::

From 937ba798c56770ec54276b9259e47ae65ee93967 Mon Sep 17 00:00:00 2001
From: Iulian Dragos <jaguarul@gmail.com>
Date: Thu, 7 May 2015 16:24:11 -0700
Subject: [PATCH 34/48] [SPARK-5281] [SQL] Registering table on RDD is giving
 MissingRequirementError

Go through the context classloader when reflecting on user types in ScalaReflection.

Replaced calls to `typeOf` with `typeTag[T].in(mirror)`. The convenience method assumes
all types can be found in the classloader that loaded scala-reflect (the primordial
classloader). This assumption is not valid in all contexts (sbt console, Eclipse launchers).

Fixed SPARK-5281

Author: Iulian Dragos <jaguarul@gmail.com>

Closes #5981 from dragos/issue/mirrors-missing-requirement-error and squashes the following commits:

d103e70 [Iulian Dragos] Go through the context classloader when reflecting on user types in ScalaReflection
---
 .../spark/sql/catalyst/ScalaReflection.scala  | 57 ++++++++++++-------
 1 file changed, 37 insertions(+), 20 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
index c52965507c715..6998cc8d9666d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
@@ -27,6 +27,7 @@ import org.apache.spark.sql.types._
  */
 object ScalaReflection extends ScalaReflection {
   val universe: scala.reflect.runtime.universe.type = scala.reflect.runtime.universe
+  val mirror: universe.Mirror = universe.runtimeMirror(Thread.currentThread().getContextClassLoader)
 }
 
 /**
@@ -36,6 +37,9 @@ trait ScalaReflection {
   /** The universe we work in (runtime or macro) */
   val universe: scala.reflect.api.Universe
 
+  /** The mirror used to access types in the universe */
+  val mirror: universe.Mirror
+
   import universe._
 
   // The Predef.Map is scala.collection.immutable.Map.
@@ -52,7 +56,19 @@ trait ScalaReflection {
 
   /** Returns a catalyst DataType and its nullability for the given Scala Type using reflection. */
   def schemaFor[T: TypeTag]: Schema =
-    ScalaReflectionLock.synchronized { schemaFor(typeOf[T]) }
+    ScalaReflectionLock.synchronized { schemaFor(localTypeOf[T]) }
+
+  /**
+   * Return the Scala Type for `T` in the current classloader mirror.
+   *
+   * Use this method instead of the convenience method `universe.typeOf`, which
+   * assumes that all types can be found in the classloader that loaded scala-reflect classes.
+   * That's not necessarily the case when running using Eclipse launchers or even
+   * Sbt console or test (without `fork := true`).
+   *
+   * @see SPARK-5281
+   */
+  private def localTypeOf[T: TypeTag]: `Type` = typeTag[T].in(mirror).tpe
 
   /** Returns a catalyst DataType and its nullability for the given Scala Type using reflection. */
   def schemaFor(tpe: `Type`): Schema = ScalaReflectionLock.synchronized {
@@ -67,25 +83,25 @@ trait ScalaReflection {
         val udt = Utils.classForName(className)
           .getAnnotation(classOf[SQLUserDefinedType]).udt().newInstance()
         Schema(udt, nullable = true)
-      case t if t <:< typeOf[Option[_]] =>
+      case t if t <:< localTypeOf[Option[_]] =>
         val TypeRef(_, _, Seq(optType)) = t
         Schema(schemaFor(optType).dataType, nullable = true)
       // Need to decide if we actually need a special type here.
-      case t if t <:< typeOf[Array[Byte]] => Schema(BinaryType, nullable = true)
-      case t if t <:< typeOf[Array[_]] =>
+      case t if t <:< localTypeOf[Array[Byte]] => Schema(BinaryType, nullable = true)
+      case t if t <:< localTypeOf[Array[_]] =>
         val TypeRef(_, _, Seq(elementType)) = t
         val Schema(dataType, nullable) = schemaFor(elementType)
         Schema(ArrayType(dataType, containsNull = nullable), nullable = true)
-      case t if t <:< typeOf[Seq[_]] =>
+      case t if t <:< localTypeOf[Seq[_]] =>
         val TypeRef(_, _, Seq(elementType)) = t
         val Schema(dataType, nullable) = schemaFor(elementType)
         Schema(ArrayType(dataType, containsNull = nullable), nullable = true)
-      case t if t <:< typeOf[Map[_, _]] =>
+      case t if t <:< localTypeOf[Map[_, _]] =>
         val TypeRef(_, _, Seq(keyType, valueType)) = t
         val Schema(valueDataType, valueNullable) = schemaFor(valueType)
         Schema(MapType(schemaFor(keyType).dataType,
           valueDataType, valueContainsNull = valueNullable), nullable = true)
-      case t if t <:< typeOf[Product] =>
+      case t if t <:< localTypeOf[Product] =>
         val formalTypeArgs = t.typeSymbol.asClass.typeParams
         val TypeRef(_, _, actualTypeArgs) = t
         val constructorSymbol = t.member(nme.CONSTRUCTOR)
@@ -107,19 +123,20 @@ trait ScalaReflection {
               schemaFor(p.typeSignature.substituteTypes(formalTypeArgs, actualTypeArgs))
             StructField(p.name.toString, dataType, nullable)
           }), nullable = true)
-      case t if t <:< typeOf[String] => Schema(StringType, nullable = true)
-      case t if t <:< typeOf[java.sql.Timestamp] => Schema(TimestampType, nullable = true)
-      case t if t <:< typeOf[java.sql.Date] => Schema(DateType, nullable = true)
-      case t if t <:< typeOf[BigDecimal] => Schema(DecimalType.Unlimited, nullable = true)
-      case t if t <:< typeOf[java.math.BigDecimal] => Schema(DecimalType.Unlimited, nullable = true)
-      case t if t <:< typeOf[Decimal] => Schema(DecimalType.Unlimited, nullable = true)
-      case t if t <:< typeOf[java.lang.Integer] => Schema(IntegerType, nullable = true)
-      case t if t <:< typeOf[java.lang.Long] => Schema(LongType, nullable = true)
-      case t if t <:< typeOf[java.lang.Double] => Schema(DoubleType, nullable = true)
-      case t if t <:< typeOf[java.lang.Float] => Schema(FloatType, nullable = true)
-      case t if t <:< typeOf[java.lang.Short] => Schema(ShortType, nullable = true)
-      case t if t <:< typeOf[java.lang.Byte] => Schema(ByteType, nullable = true)
-      case t if t <:< typeOf[java.lang.Boolean] => Schema(BooleanType, nullable = true)
+      case t if t <:< localTypeOf[String] => Schema(StringType, nullable = true)
+      case t if t <:< localTypeOf[java.sql.Timestamp] => Schema(TimestampType, nullable = true)
+      case t if t <:< localTypeOf[java.sql.Date] => Schema(DateType, nullable = true)
+      case t if t <:< localTypeOf[BigDecimal] => Schema(DecimalType.Unlimited, nullable = true)
+      case t if t <:< localTypeOf[java.math.BigDecimal] =>
+        Schema(DecimalType.Unlimited, nullable = true)
+      case t if t <:< localTypeOf[Decimal] => Schema(DecimalType.Unlimited, nullable = true)
+      case t if t <:< localTypeOf[java.lang.Integer] => Schema(IntegerType, nullable = true)
+      case t if t <:< localTypeOf[java.lang.Long] => Schema(LongType, nullable = true)
+      case t if t <:< localTypeOf[java.lang.Double] => Schema(DoubleType, nullable = true)
+      case t if t <:< localTypeOf[java.lang.Float] => Schema(FloatType, nullable = true)
+      case t if t <:< localTypeOf[java.lang.Short] => Schema(ShortType, nullable = true)
+      case t if t <:< localTypeOf[java.lang.Byte] => Schema(ByteType, nullable = true)
+      case t if t <:< localTypeOf[java.lang.Boolean] => Schema(BooleanType, nullable = true)
       case t if t <:< definitions.IntTpe => Schema(IntegerType, nullable = false)
       case t if t <:< definitions.LongTpe => Schema(LongType, nullable = false)
       case t if t <:< definitions.DoubleTpe => Schema(DoubleType, nullable = false)

From 35f0173b8f67e2e506fc4575be6430cfb66e2238 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <cloud0fan@outlook.com>
Date: Thu, 7 May 2015 16:26:49 -0700
Subject: [PATCH 35/48] [SPARK-2155] [SQL] [WHEN D THEN E] [ELSE F] add
 CaseKeyWhen for "CASE a WHEN b THEN c * END"

Avoid translating to CaseWhen and evaluate the key expression many times.

Author: Wenchen Fan <cloud0fan@outlook.com>

Closes #5979 from cloud-fan/condition and squashes the following commits:

3ce54e1 [Wenchen Fan] add CaseKeyWhen
---
 .../apache/spark/sql/catalyst/SqlParser.scala |  10 +-
 .../catalyst/analysis/HiveTypeCoercion.scala  |  43 +++---
 .../sql/catalyst/expressions/Expression.scala |   2 +-
 .../sql/catalyst/expressions/predicates.scala | 135 ++++++++++++------
 .../ExpressionEvaluationSuite.scala           |  26 ++++
 .../spark/sql/DataFrameNaFunctions.scala      |   9 +-
 .../org/apache/spark/sql/hive/HiveQl.scala    |  12 +-
 .../sql/hive/execution/SQLQuerySuite.scala    |   7 +
 8 files changed, 159 insertions(+), 85 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala
index 1d3a2dc0d9bb0..b06bfb2ce8c8e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala
@@ -296,13 +296,13 @@ class SqlParser extends AbstractSparkSQLParser with DataTypeParser {
     | LOWER ~ "(" ~> expression <~ ")" ^^ { case exp => Lower(exp) }
     | IF ~ "(" ~> expression ~ ("," ~> expression) ~ ("," ~> expression) <~ ")" ^^
       { case c ~ t ~ f => If(c, t, f) }
-    | CASE ~> expression.? ~ (WHEN ~> expression ~ (THEN ~> expression)).* ~
+    | CASE ~> expression.? ~ rep1(WHEN ~> expression ~ (THEN ~> expression)) ~
         (ELSE ~> expression).? <~ END ^^ {
           case casePart ~ altPart ~ elsePart =>
-            val altExprs = altPart.flatMap { case whenExpr ~ thenExpr =>
-              Seq(casePart.fold(whenExpr)(EqualTo(_, whenExpr)), thenExpr)
-            }
-            CaseWhen(altExprs ++ elsePart.toList)
+            val branches = altPart.flatMap { case whenExpr ~ thenExpr =>
+              Seq(whenExpr, thenExpr)
+            } ++ elsePart
+            casePart.map(CaseKeyWhen(_, branches)).getOrElse(CaseWhen(branches))
         }
     | (SUBSTR | SUBSTRING) ~ "(" ~> expression ~ ("," ~> expression) <~ ")" ^^
       { case s ~ p => Substring(s, p, Literal(Integer.MAX_VALUE)) }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala
index 873c75c525c3b..168a4e30eab86 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala
@@ -631,31 +631,24 @@ trait HiveTypeCoercion {
     import HiveTypeCoercion._
 
     def apply(plan: LogicalPlan): LogicalPlan = plan transformAllExpressions {
-      case cw @ CaseWhen(branches) if !cw.resolved && !branches.exists(!_.resolved)  =>
-        val valueTypes = branches.sliding(2, 2).map {
-          case Seq(_, value) => value.dataType
-          case Seq(elseVal) => elseVal.dataType
-        }.toSeq
-
-        logDebug(s"Input values for null casting ${valueTypes.mkString(",")}")
-
-        if (valueTypes.distinct.size > 1) {
-          val commonType = valueTypes.reduce { (v1, v2) =>
-            findTightestCommonType(v1, v2)
-              .getOrElse(sys.error(
-                s"Types in CASE WHEN must be the same or coercible to a common type: $v1 != $v2"))
-          }
-          val transformedBranches = branches.sliding(2, 2).map {
-            case Seq(cond, value) if value.dataType != commonType =>
-              Seq(cond, Cast(value, commonType))
-            case Seq(elseVal) if elseVal.dataType != commonType =>
-              Seq(Cast(elseVal, commonType))
-            case s => s
-          }.reduce(_ ++ _)
-          CaseWhen(transformedBranches)
-        } else {
-          // Types match up.  Hopefully some other rule fixes whatever is wrong with resolution.
-          cw
+      case cw: CaseWhenLike if !cw.resolved && cw.childrenResolved && !cw.valueTypesEqual  =>
+        logDebug(s"Input values for null casting ${cw.valueTypes.mkString(",")}")
+        val commonType = cw.valueTypes.reduce { (v1, v2) =>
+          findTightestCommonType(v1, v2).getOrElse(sys.error(
+            s"Types in CASE WHEN must be the same or coercible to a common type: $v1 != $v2"))
+        }
+        val transformedBranches = cw.branches.sliding(2, 2).map {
+          case Seq(when, value) if value.dataType != commonType =>
+            Seq(when, Cast(value, commonType))
+          case Seq(elseVal) if elseVal.dataType != commonType =>
+            Seq(Cast(elseVal, commonType))
+          case s => s
+        }.reduce(_ ++ _)
+        cw match {
+          case _: CaseWhen =>
+            CaseWhen(transformedBranches)
+          case CaseKeyWhen(key, _) =>
+            CaseKeyWhen(key, transformedBranches)
         }
     }
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
index 4fd1bc4dd642d..0837a3179d897 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
@@ -64,7 +64,7 @@ abstract class Expression extends TreeNode[Expression] {
    * Returns true if  all the children of this expression have been resolved to a specific schema
    * and false if any still contains any unresolved placeholders.
    */
-  def childrenResolved: Boolean = !children.exists(!_.resolved)
+  def childrenResolved: Boolean = children.forall(_.resolved)
 
   /**
    * Returns a string representation of this expression that does not have developer centric
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
index 26c38c56c04f5..50b0f3ee5f93f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
@@ -353,79 +353,134 @@ case class If(predicate: Expression, trueValue: Expression, falseValue: Expressi
   override def toString: String = s"if ($predicate) $trueValue else $falseValue"
 }
 
+trait CaseWhenLike extends Expression {
+  self: Product =>
+
+  type EvaluatedType = Any
+
+  // Note that `branches` are considered in consecutive pairs (cond, val), and the optional last
+  // element is the value for the default catch-all case (if provided).
+  // Hence, `branches` consists of at least two elements, and can have an odd or even length.
+  def branches: Seq[Expression]
+
+  @transient lazy val whenList =
+    branches.sliding(2, 2).collect { case Seq(whenExpr, _) => whenExpr }.toSeq
+  @transient lazy val thenList =
+    branches.sliding(2, 2).collect { case Seq(_, thenExpr) => thenExpr }.toSeq
+  val elseValue = if (branches.length % 2 == 0) None else Option(branches.last)
+
+  // both then and else val should be considered.
+  def valueTypes: Seq[DataType] = (thenList ++ elseValue).map(_.dataType)
+  def valueTypesEqual: Boolean = valueTypes.distinct.size <= 1
+
+  override def dataType: DataType = {
+    if (!resolved) {
+      throw new UnresolvedException(this, "cannot resolve due to differing types in some branches")
+    }
+    valueTypes.head
+  }
+
+  override def nullable: Boolean = {
+    // If no value is nullable and no elseValue is provided, the whole statement defaults to null.
+    thenList.exists(_.nullable) || (elseValue.map(_.nullable).getOrElse(true))
+  }
+}
+
 // scalastyle:off
 /**
  * Case statements of the form "CASE WHEN a THEN b [WHEN c THEN d]* [ELSE e] END".
  * Refer to this link for the corresponding semantics:
  * https://cwiki.apache.org/confluence/display/Hive/LanguageManual+UDF#LanguageManualUDF-ConditionalFunctions
- *
- * The other form of case statements "CASE a WHEN b THEN c [WHEN d THEN e]* [ELSE f] END" gets
- * translated to this form at parsing time.  Namely, such a statement gets translated to
- * "CASE WHEN a=b THEN c [WHEN a=d THEN e]* [ELSE f] END".
- *
- * Note that `branches` are considered in consecutive pairs (cond, val), and the optional last
- * element is the value for the default catch-all case (if provided). Hence, `branches` consists of
- * at least two elements, and can have an odd or even length.
  */
 // scalastyle:on
-case class CaseWhen(branches: Seq[Expression]) extends Expression {
-  type EvaluatedType = Any
+case class CaseWhen(branches: Seq[Expression]) extends CaseWhenLike {
+
+  // Use private[this] Array to speed up evaluation.
+  @transient private[this] lazy val branchesArr = branches.toArray
 
   override def children: Seq[Expression] = branches
 
-  override def dataType: DataType = {
-    if (!resolved) {
-      throw new UnresolvedException(this, "cannot resolve due to differing types in some branches")
+  override lazy val resolved: Boolean =
+    childrenResolved &&
+    whenList.forall(_.dataType == BooleanType) &&
+    valueTypesEqual
+
+  /** Written in imperative fashion for performance considerations. */
+  override def eval(input: Row): Any = {
+    val len = branchesArr.length
+    var i = 0
+    // If all branches fail and an elseVal is not provided, the whole statement
+    // defaults to null, according to Hive's semantics.
+    while (i < len - 1) {
+      if (branchesArr(i).eval(input) == true) {
+        return branchesArr(i + 1).eval(input)
+      }
+      i += 2
+    }
+    var res: Any = null
+    if (i == len - 1) {
+      res = branchesArr(i).eval(input)
     }
-    branches(1).dataType
+    return res
   }
 
+  override def toString: String = {
+    "CASE" + branches.sliding(2, 2).map {
+      case Seq(cond, value) => s" WHEN $cond THEN $value"
+      case Seq(elseValue) => s" ELSE $elseValue"
+    }.mkString
+  }
+}
+
+// scalastyle:off
+/**
+ * Case statements of the form "CASE a WHEN b THEN c [WHEN d THEN e]* [ELSE f] END".
+ * Refer to this link for the corresponding semantics:
+ * https://cwiki.apache.org/confluence/display/Hive/LanguageManual+UDF#LanguageManualUDF-ConditionalFunctions
+ */
+// scalastyle:on
+case class CaseKeyWhen(key: Expression, branches: Seq[Expression]) extends CaseWhenLike {
+
+  // Use private[this] Array to speed up evaluation.
   @transient private[this] lazy val branchesArr = branches.toArray
-  @transient private[this] lazy val predicates =
-    branches.sliding(2, 2).collect { case Seq(cond, _) => cond }.toSeq
-  @transient private[this] lazy val values =
-    branches.sliding(2, 2).collect { case Seq(_, value) => value }.toSeq
-  @transient private[this] lazy val elseValue =
-    if (branches.length % 2 == 0) None else Option(branches.last)
 
-  override def nullable: Boolean = {
-    // If no value is nullable and no elseValue is provided, the whole statement defaults to null.
-    values.exists(_.nullable) || (elseValue.map(_.nullable).getOrElse(true))
-  }
+  override def children: Seq[Expression] = key +: branches
 
-  override lazy val resolved: Boolean = {
-    if (!childrenResolved) {
-      false
-    } else {
-      val allCondBooleans = predicates.forall(_.dataType == BooleanType)
-      // both then and else val should be considered.
-      val dataTypesEqual = (values ++ elseValue).map(_.dataType).distinct.size <= 1
-      allCondBooleans && dataTypesEqual
-    }
-  }
+  override lazy val resolved: Boolean =
+    childrenResolved && valueTypesEqual
 
   /** Written in imperative fashion for performance considerations. */
   override def eval(input: Row): Any = {
+    val evaluatedKey = key.eval(input)
     val len = branchesArr.length
     var i = 0
     // If all branches fail and an elseVal is not provided, the whole statement
     // defaults to null, according to Hive's semantics.
-    var res: Any = null
     while (i < len - 1) {
-      if (branchesArr(i).eval(input) == true) {
-        res = branchesArr(i + 1).eval(input)
-        return res
+      if (equalNullSafe(evaluatedKey, branchesArr(i).eval(input))) {
+        return branchesArr(i + 1).eval(input)
       }
       i += 2
     }
+    var res: Any = null
     if (i == len - 1) {
       res = branchesArr(i).eval(input)
     }
-    res
+    return res
+  }
+
+  private def equalNullSafe(l: Any, r: Any) = {
+    if (l == null && r == null) {
+      true
+    } else if (l == null || r == null) {
+      false
+    } else {
+      l == r
+    }
   }
 
   override def toString: String = {
-    "CASE" + branches.sliding(2, 2).map {
+    s"CASE $key" + branches.sliding(2, 2).map {
       case Seq(cond, value) => s" WHEN $cond THEN $value"
       case Seq(elseValue) => s" ELSE $elseValue"
     }.mkString
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala
index faaa55aa5e6f1..88d36d153ceea 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala
@@ -850,6 +850,32 @@ class ExpressionEvaluationSuite extends ExpressionEvaluationBaseSuite {
     assert(CaseWhen(Seq(c2, c4_notNull, c3, c5)).nullable === true)
   }
 
+  test("case key when") {
+    val row = create_row(null, 1, 2, "a", "b", "c")
+    val c1 = 'a.int.at(0)
+    val c2 = 'a.int.at(1)
+    val c3 = 'a.int.at(2)
+    val c4 = 'a.string.at(3)
+    val c5 = 'a.string.at(4)
+    val c6 = 'a.string.at(5)
+
+    val literalNull = Literal.create(null, BooleanType)
+    val literalInt = Literal(1)
+    val literalString = Literal("a")
+
+    checkEvaluation(CaseKeyWhen(c1, Seq(c2, c4, c5)), "b", row)
+    checkEvaluation(CaseKeyWhen(c1, Seq(c2, c4, literalNull, c5, c6)), "b", row)
+    checkEvaluation(CaseKeyWhen(c2, Seq(literalInt, c4, c5)), "a", row)
+    checkEvaluation(CaseKeyWhen(c2, Seq(c1, c4, c5)), "b", row)
+    checkEvaluation(CaseKeyWhen(c4, Seq(literalString, c2, c3)), 1, row)
+    checkEvaluation(CaseKeyWhen(c4, Seq(c1, c3, c5, c2, Literal(3))), 3, row)
+
+    checkEvaluation(CaseKeyWhen(literalInt, Seq(c2, c4, c5)), "a", row)
+    checkEvaluation(CaseKeyWhen(literalString, Seq(c5, c2, c4, c3)), 2, row)
+    checkEvaluation(CaseKeyWhen(literalInt, Seq(c5, c2, c4, c3)), null, row)
+    checkEvaluation(CaseKeyWhen(literalNull, Seq(c5, c2, c1, c3)), 2, row)
+  }
+
   test("complex type") {
     val row = create_row(
       "^Ba*n",                                // 0
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala
index 481ed4924857e..4a54120ba86f6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala
@@ -357,11 +357,12 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) {
    * TODO: This can be optimized to use broadcast join when replacementMap is large.
    */
   private def replaceCol(col: StructField, replacementMap: Map[_, _]): Column = {
-    val branches: Seq[Expression] = replacementMap.flatMap { case (source, target) =>
-      df.col(col.name).equalTo(lit(source).cast(col.dataType)).expr ::
-        lit(target).cast(col.dataType).expr :: Nil
+    val keyExpr = df.col(col.name).expr
+    def buildExpr(v: Any) = Cast(Literal(v), keyExpr.dataType)
+    val branches = replacementMap.flatMap { case (source, target) =>
+      Seq(buildExpr(source), buildExpr(target))
     }.toSeq
-    new Column(CaseWhen(branches ++ Seq(df.col(col.name).expr))).as(col.name)
+    new Column(CaseKeyWhen(keyExpr, branches :+ keyExpr)).as(col.name)
   }
 
   private def convertToDouble(v: Any): Double = v match {
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
index 4e51473979a2a..6176aee25e7a4 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
@@ -1246,16 +1246,8 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C
     case Token("TOK_FUNCTION", Token(WHEN(), Nil) :: branches) =>
       CaseWhen(branches.map(nodeToExpr))
     case Token("TOK_FUNCTION", Token(CASE(), Nil) :: branches) =>
-      val transformed = branches.drop(1).sliding(2, 2).map {
-        case Seq(condVal, value) =>
-          // FIXME (SPARK-2155): the key will get evaluated for multiple times in CaseWhen's eval().
-          // Hence effectful / non-deterministic key expressions are *not* supported at the moment.
-          // We should consider adding new Expressions to get around this.
-          Seq(EqualTo(nodeToExpr(branches(0)), nodeToExpr(condVal)),
-              nodeToExpr(value))
-        case Seq(elseVal) => Seq(nodeToExpr(elseVal))
-      }.toSeq.reduce(_ ++ _)
-      CaseWhen(transformed)
+      val keyExpr = nodeToExpr(branches.head)
+      CaseKeyWhen(keyExpr, branches.drop(1).map(nodeToExpr))
 
     /* Complex datatype manipulation */
     case Token("[", child :: ordinal :: Nil) =>
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index 616352d223c5b..c605f1017547a 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -751,4 +751,11 @@ class SQLQuerySuite extends QueryTest {
         (6, "c", 0, 6)
       ).map(i => Row(i._1, i._2, i._3, i._4)))
   }
+
+  test("test case key when") {
+    (1 to 5).map(i => (i, i.toString)).toDF("k", "v").registerTempTable("t")
+    checkAnswer(
+      sql("SELECT CASE k WHEN 2 THEN 22 WHEN 4 THEN 44 ELSE 0 END, v FROM t"),
+      Row(0, "1") :: Row(22, "2") :: Row(0, "3") :: Row(44, "4") :: Row(0, "5") :: Nil)
+  }
 }

From 88063c62689135da56ee6678b2e826b88c382732 Mon Sep 17 00:00:00 2001
From: tedyu <yuzhihong@gmail.com>
Date: Thu, 7 May 2015 16:53:59 -0700
Subject: [PATCH 36/48] [SPARK-7450] Use UNSAFE.getLong() to speed up
 BitSetMethods#anySet()

Author: tedyu <yuzhihong@gmail.com>

Closes #5897 from tedyu/master and squashes the following commits:

473bf9d [tedyu] Address Josh's review comments
1719c5b [tedyu] Correct upper bound in for loop
b51dcaf [tedyu] Add unit test in BitSetSuite for BitSet#anySet()
83f9f87 [tedyu] Merge branch 'master' of github.com:apache/spark
817e3f9 [tedyu] Replace constant 8 with SIZE_OF_LONG
75a467b [tedyu] Correct offset for UNSAFE.getLong()
855374b [tedyu] Remove second loop since bitSetWidthInBytes is WORD aligned
093b7a4 [tedyu] Use UNSAFE.getLong() to speed up BitSetMethods#anySet()
63ee050 [tedyu] Use UNSAFE.getLong() to speed up BitSetMethods#anySet()
4ca0ef6 [tedyu] Use UNSAFE.getLong() to speed up BitSetMethods#anySet()
3e9b6919 [tedyu] Use UNSAFE.getLong() to speed up BitSetMethods#anySet()
---
 .../main/java/org/apache/spark/unsafe/bitset/BitSet.java  | 8 ++++++++
 .../org/apache/spark/unsafe/bitset/BitSetMethods.java     | 7 ++++---
 .../java/org/apache/spark/unsafe/bitset/BitSetSuite.java  | 7 +++++++
 3 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/unsafe/src/main/java/org/apache/spark/unsafe/bitset/BitSet.java b/unsafe/src/main/java/org/apache/spark/unsafe/bitset/BitSet.java
index f72e07fce92fd..28e23da108ebe 100644
--- a/unsafe/src/main/java/org/apache/spark/unsafe/bitset/BitSet.java
+++ b/unsafe/src/main/java/org/apache/spark/unsafe/bitset/BitSet.java
@@ -102,4 +102,12 @@ public boolean isSet(int index) {
   public int nextSetBit(int fromIndex) {
     return BitSetMethods.nextSetBit(baseObject, baseOffset, fromIndex, numWords);
   }
+
+  /**
+   * Returns {@code true} if any bit is set.
+   */
+  public boolean anySet() {
+    return BitSetMethods.anySet(baseObject, baseOffset, numWords);
+  }
+
 }
diff --git a/unsafe/src/main/java/org/apache/spark/unsafe/bitset/BitSetMethods.java b/unsafe/src/main/java/org/apache/spark/unsafe/bitset/BitSetMethods.java
index f30626d8f4317..0987191c1c636 100644
--- a/unsafe/src/main/java/org/apache/spark/unsafe/bitset/BitSetMethods.java
+++ b/unsafe/src/main/java/org/apache/spark/unsafe/bitset/BitSetMethods.java
@@ -70,9 +70,10 @@ public static boolean isSet(Object baseObject, long baseOffset, int index) {
   /**
    * Returns {@code true} if any bit is set.
    */
-  public static boolean anySet(Object baseObject, long baseOffset, long bitSetWidthInBytes) {
-    for (int i = 0; i <= bitSetWidthInBytes; i++) {
-      if (PlatformDependent.UNSAFE.getByte(baseObject, baseOffset + i) != 0) {
+  public static boolean anySet(Object baseObject, long baseOffset, long bitSetWidthInWords) {
+    long addr = baseOffset;
+    for (int i = 0; i < bitSetWidthInWords; i++, addr += WORD_SIZE) {
+      if (PlatformDependent.UNSAFE.getLong(baseObject, addr) != 0) {
         return true;
       }
     }
diff --git a/unsafe/src/test/java/org/apache/spark/unsafe/bitset/BitSetSuite.java b/unsafe/src/test/java/org/apache/spark/unsafe/bitset/BitSetSuite.java
index e3a824e29b768..18393db9f382f 100644
--- a/unsafe/src/test/java/org/apache/spark/unsafe/bitset/BitSetSuite.java
+++ b/unsafe/src/test/java/org/apache/spark/unsafe/bitset/BitSetSuite.java
@@ -39,6 +39,8 @@ public void basicOps() {
     for (int i = 0; i < bs.capacity(); i++) {
       Assert.assertFalse(bs.isSet(i));
     }
+    // another form of asserting that the bit set is empty
+    Assert.assertFalse(bs.anySet());
 
     // Set every bit and check it.
     for (int i = 0; i < bs.capacity(); i++) {
@@ -52,6 +54,11 @@ public void basicOps() {
       bs.unset(i);
       Assert.assertFalse(bs.isSet(i));
     }
+
+    // Make sure anySet() can detect any set bit
+    bs = createBitSet(256);
+    bs.set(64);
+    Assert.assertTrue(bs.anySet());
   }
 
   @Test

From 22ab70e06ede65ca865073fe36c859042a920aa3 Mon Sep 17 00:00:00 2001
From: zsxwing <zsxwing@gmail.com>
Date: Thu, 7 May 2015 17:34:44 -0700
Subject: [PATCH 37/48] [SPARK-7305] [STREAMING] [WEBUI] Make BatchPage show
 friendly information when jobs are dropped by SparkListener

If jobs are dropped by SparkListener, at least we can show the job ids in BatchPage. Screenshot:

![b1](https://cloud.githubusercontent.com/assets/1000778/7434968/f19aa784-eff3-11e4-8f86-36a073873574.png)

Author: zsxwing <zsxwing@gmail.com>

Closes #5840 from zsxwing/SPARK-7305 and squashes the following commits:

aca0ba6 [zsxwing] Fix the code style
718765e [zsxwing] Make generateNormalJobRow private
8073b03 [zsxwing] Merge branch 'master' into SPARK-7305
83dec11 [zsxwing] Make BatchPage show friendly information when jobs are dropped by SparkListener
---
 .../apache/spark/streaming/ui/BatchPage.scala | 136 ++++++++++++++----
 1 file changed, 106 insertions(+), 30 deletions(-)

diff --git a/streaming/src/main/scala/org/apache/spark/streaming/ui/BatchPage.scala b/streaming/src/main/scala/org/apache/spark/streaming/ui/BatchPage.scala
index 3f1cab69068dc..831f60e870f74 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/ui/BatchPage.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/ui/BatchPage.scala
@@ -19,7 +19,7 @@ package org.apache.spark.streaming.ui
 
 import javax.servlet.http.HttpServletRequest
 
-import scala.xml.{NodeSeq, Node}
+import scala.xml.{NodeSeq, Node, Text}
 
 import org.apache.commons.lang3.StringEscapeUtils
 
@@ -28,6 +28,7 @@ import org.apache.spark.ui.{UIUtils => SparkUIUtils, WebUIPage}
 import org.apache.spark.streaming.ui.StreamingJobProgressListener.{SparkJobId, OutputOpId}
 import org.apache.spark.ui.jobs.UIData.JobUIData
 
+private case class SparkJobIdWithUIData(sparkJobId: SparkJobId, jobUIData: Option[JobUIData])
 
 private[ui] class BatchPage(parent: StreamingTab) extends WebUIPage("batch") {
   private val streamingListener = parent.listener
@@ -44,25 +45,33 @@ private[ui] class BatchPage(parent: StreamingTab) extends WebUIPage("batch") {
       <th>Error</th>
   }
 
+  private def generateJobRow(
+      outputOpId: OutputOpId,
+      outputOpDescription: Seq[Node],
+      formattedOutputOpDuration: String,
+      numSparkJobRowsInOutputOp: Int,
+      isFirstRow: Boolean,
+      sparkJob: SparkJobIdWithUIData): Seq[Node] = {
+    if (sparkJob.jobUIData.isDefined) {
+      generateNormalJobRow(outputOpId, outputOpDescription, formattedOutputOpDuration,
+        numSparkJobRowsInOutputOp, isFirstRow, sparkJob.jobUIData.get)
+    } else {
+      generateDroppedJobRow(outputOpId, outputOpDescription, formattedOutputOpDuration,
+        numSparkJobRowsInOutputOp, isFirstRow, sparkJob.sparkJobId)
+    }
+  }
+
   /**
    * Generate a row for a Spark Job. Because duplicated output op infos needs to be collapsed into
    * one cell, we use "rowspan" for the first row of a output op.
    */
-  def generateJobRow(
+  private def generateNormalJobRow(
       outputOpId: OutputOpId,
+      outputOpDescription: Seq[Node],
       formattedOutputOpDuration: String,
       numSparkJobRowsInOutputOp: Int,
       isFirstRow: Boolean,
       sparkJob: JobUIData): Seq[Node] = {
-    val lastStageInfo = Option(sparkJob.stageIds)
-      .filter(_.nonEmpty)
-      .flatMap { ids => sparkListener.stageIdToInfo.get(ids.max) }
-    val lastStageData = lastStageInfo.flatMap { s =>
-      sparkListener.stageIdToData.get((s.stageId, s.attemptId))
-    }
-
-    val lastStageName = lastStageInfo.map(_.name).getOrElse("(Unknown Stage Name)")
-    val lastStageDescription = lastStageData.flatMap(_.description).getOrElse("")
     val duration: Option[Long] = {
       sparkJob.submissionTime.map { start =>
         val end = sparkJob.completionTime.getOrElse(System.currentTimeMillis())
@@ -83,9 +92,7 @@ private[ui] class BatchPage(parent: StreamingTab) extends WebUIPage("batch") {
       if (isFirstRow) {
         <td class="output-op-id-cell" rowspan={numSparkJobRowsInOutputOp.toString}>{outputOpId.toString}</td>
         <td rowspan={numSparkJobRowsInOutputOp.toString}>
-          <span class="description-input" title={lastStageDescription}>
-            {lastStageDescription}
-          </span>{lastStageName}
+          {outputOpDescription}
         </td>
         <td rowspan={numSparkJobRowsInOutputOp.toString}>{formattedOutputOpDuration}</td>
       } else {
@@ -122,27 +129,96 @@ private[ui] class BatchPage(parent: StreamingTab) extends WebUIPage("batch") {
     </tr>
   }
 
-  private def generateOutputOpIdRow(
-      outputOpId: OutputOpId, sparkJobs: Seq[JobUIData]): Seq[Node] = {
-    val sparkjobDurations = sparkJobs.map(sparkJob => {
-      sparkJob.submissionTime.map { start =>
-        val end = sparkJob.completionTime.getOrElse(System.currentTimeMillis())
-        end - start
+  /**
+   * If a job is dropped by sparkListener due to exceeding the limitation, we only show the job id
+   * with "-" cells.
+   */
+  private def generateDroppedJobRow(
+      outputOpId: OutputOpId,
+      outputOpDescription: Seq[Node],
+      formattedOutputOpDuration: String,
+      numSparkJobRowsInOutputOp: Int,
+      isFirstRow: Boolean,
+      jobId: Int): Seq[Node] = {
+    // In the first row, output op id and its information needs to be shown. In other rows, these
+    // cells will be taken up due to "rowspan".
+    // scalastyle:off
+    val prefixCells =
+      if (isFirstRow) {
+        <td class="output-op-id-cell" rowspan={numSparkJobRowsInOutputOp.toString}>{outputOpId.toString}</td>
+          <td rowspan={numSparkJobRowsInOutputOp.toString}>{outputOpDescription}</td>
+          <td rowspan={numSparkJobRowsInOutputOp.toString}>{formattedOutputOpDuration}</td>
+      } else {
+        Nil
       }
-    })
+    // scalastyle:on
+
+    <tr>
+      {prefixCells}
+      <td sorttable_customkey={jobId.toString}>
+        {jobId.toString}
+      </td>
+      <!-- Duration -->
+      <td>-</td>
+      <!-- Stages: Succeeded/Total -->
+      <td>-</td>
+      <!-- Tasks (for all stages): Succeeded/Total -->
+      <td>-</td>
+      <!-- Error -->
+      <td>-</td>
+    </tr>
+  }
+
+  private def generateOutputOpIdRow(
+      outputOpId: OutputOpId, sparkJobs: Seq[SparkJobIdWithUIData]): Seq[Node] = {
+    // We don't count the durations of dropped jobs
+    val sparkJobDurations = sparkJobs.filter(_.jobUIData.nonEmpty).map(_.jobUIData.get).
+      map(sparkJob => {
+        sparkJob.submissionTime.map { start =>
+          val end = sparkJob.completionTime.getOrElse(System.currentTimeMillis())
+          end - start
+        }
+      })
     val formattedOutputOpDuration =
-      if (sparkjobDurations.exists(_ == None)) {
-        // If any job does not finish, set "formattedOutputOpDuration" to "-"
+      if (sparkJobDurations.isEmpty || sparkJobDurations.exists(_ == None)) {
+        // If no job or any job does not finish, set "formattedOutputOpDuration" to "-"
         "-"
       } else {
-        SparkUIUtils.formatDuration(sparkjobDurations.flatMap(x => x).sum)
+        SparkUIUtils.formatDuration(sparkJobDurations.flatMap(x => x).sum)
       }
-    generateJobRow(outputOpId, formattedOutputOpDuration, sparkJobs.size, true, sparkJobs.head) ++
+
+    val description = generateOutputOpDescription(sparkJobs)
+
+    generateJobRow(
+      outputOpId, description, formattedOutputOpDuration, sparkJobs.size, true, sparkJobs.head) ++
       sparkJobs.tail.map { sparkJob =>
-        generateJobRow(outputOpId, formattedOutputOpDuration, sparkJobs.size, false, sparkJob)
+        generateJobRow(
+          outputOpId, description, formattedOutputOpDuration, sparkJobs.size, false, sparkJob)
       }.flatMap(x => x)
   }
 
+  private def generateOutputOpDescription(sparkJobs: Seq[SparkJobIdWithUIData]): Seq[Node] = {
+    val lastStageInfo =
+      sparkJobs.flatMap(_.jobUIData).headOption. // Get the first JobUIData
+        flatMap { sparkJob => // For the first job, get the latest Stage info
+          if (sparkJob.stageIds.isEmpty) {
+            None
+          } else {
+            sparkListener.stageIdToInfo.get(sparkJob.stageIds.max)
+          }
+        }
+    val lastStageData = lastStageInfo.flatMap { s =>
+      sparkListener.stageIdToData.get((s.stageId, s.attemptId))
+    }
+
+    val lastStageName = lastStageInfo.map(_.name).getOrElse("(Unknown Stage Name)")
+    val lastStageDescription = lastStageData.flatMap(_.description).getOrElse("")
+
+    <span class="description-input" title={lastStageDescription}>
+      {lastStageDescription}
+    </span> ++ Text(lastStageName)
+  }
+
   private def failureReasonCell(failureReason: String): Seq[Node] = {
     val isMultiline = failureReason.indexOf('\n') >= 0
     // Display the first line by default
@@ -187,10 +263,10 @@ private[ui] class BatchPage(parent: StreamingTab) extends WebUIPage("batch") {
         (outputOpId, outputOpIdAndSparkJobIds.map(_.sparkJobId).sorted)
       }
     sparkListener.synchronized {
-      val outputOpIdWithJobs: Seq[(OutputOpId, Seq[JobUIData])] =
+      val outputOpIdWithJobs: Seq[(OutputOpId, Seq[SparkJobIdWithUIData])] =
         outputOpIdToSparkJobIds.map { case (outputOpId, sparkJobIds) =>
-          // Filter out spark Job ids that don't exist in sparkListener
-          (outputOpId, sparkJobIds.flatMap(getJobData))
+          (outputOpId,
+            sparkJobIds.map(sparkJobId => SparkJobIdWithUIData(sparkJobId, getJobData(sparkJobId))))
         }
 
       <table id="batch-job-table" class="table table-bordered table-striped table-condensed">
@@ -200,7 +276,7 @@ private[ui] class BatchPage(parent: StreamingTab) extends WebUIPage("batch") {
         <tbody>
           {
             outputOpIdWithJobs.map {
-              case (outputOpId, jobs) => generateOutputOpIdRow(outputOpId, jobs)
+              case (outputOpId, sparkJobIds) => generateOutputOpIdRow(outputOpId, sparkJobIds)
             }
           }
         </tbody>

From cd1d4110cfffb413ab585cf1cc8f1264243cb393 Mon Sep 17 00:00:00 2001
From: Michael Armbrust <michael@databricks.com>
Date: Thu, 7 May 2015 19:36:24 -0700
Subject: [PATCH 38/48] [SPARK-6908] [SQL] Use isolated Hive client

This PR switches Spark SQL's Hive support to use the isolated hive client interface introduced by #5851, instead of directly interacting with the client.  By using this isolated client we can now allow users to dynamically configure the version of Hive that they are connecting to by setting `spark.sql.hive.metastore.version` without the need recompile.  This also greatly reduces the surface area for our interaction with the hive libraries, hopefully making it easier to support other versions in the future.

Jars for the desired hive version can be configured using `spark.sql.hive.metastore.jars`, which accepts the following options:
 - a colon-separated list of jar files or directories for hive and hadoop.
 - `builtin` - attempt to discover the jars that were used to load Spark SQL and use those. This
            option is only valid when using the execution version of Hive.
 - `maven` - download the correct version of hive on demand from maven.

By default, `builtin` is used for Hive 13.

This PR also removes the test step for building against Hive 12, as this will no longer be required to talk to Hive 12 metastores.  However, the full removal of the Shim is deferred until a later PR.

Remaining TODOs:
 - Remove the Hive Shims and inline code for Hive 13.
 - Several HiveCompatibility tests are not yet passing.
  - `nullformatCTAS` - As detailed below, we now are handling CTAS parsing ourselves instead of hacking into the Hive semantic analyzer.  However, we currently only handle the common cases and not things like CTAS where the null format is specified.
  - `combine1` now leaks state about compression somehow, breaking all subsequent tests.  As such we currently add it to the blacklist
  - `part_inherit_tbl_props` and `part_inherit_tbl_props_with_star` do not work anymore.  We are correctly propagating the information
  - "load_dyn_part14.*" - These tests pass when run on their own, but fail when run with all other tests.  It seems our `RESET` mechanism may not be as robust as it used to be?

Other required changes:
 -  `CreateTableAsSelect` no longer carries parts of the HiveQL AST with it through the query execution pipeline.  Instead, we parse CTAS during the HiveQL conversion and construct a `HiveTable`.  The full parsing here is not yet complete as detailed above in the remaining TODOs.  Since the operator is Hive specific, it is moved to the hive package.
 - `Command` is simplified to be a trait that simply acts as a marker for a LogicalPlan that should be eagerly evaluated.

Author: Michael Armbrust <michael@databricks.com>

Closes #5876 from marmbrus/useIsolatedClient and squashes the following commits:

258d000 [Michael Armbrust] really really correct path handling
e56fd4a [Michael Armbrust] getAbsolutePath
5a259f5 [Michael Armbrust] fix typos
81bb366 [Michael Armbrust] comments from vanzin
5f3945e [Michael Armbrust] Merge remote-tracking branch 'origin/master' into useIsolatedClient
4b5cd41 [Michael Armbrust] yin's comments
f5de7de [Michael Armbrust] cleanup
11e9c72 [Michael Armbrust] better coverage in versions suite
7e8f010 [Michael Armbrust] better error messages and jar handling
e7b3941 [Michael Armbrust] more permisive checking for function registration
da91ba7 [Michael Armbrust] Merge remote-tracking branch 'origin/master' into useIsolatedClient
5fe5894 [Michael Armbrust] fix serialization suite
81711c4 [Michael Armbrust] Initial support for running without maven
1d8ae44 [Michael Armbrust] fix final tests?
1c50813 [Michael Armbrust] more comments
a3bee70 [Michael Armbrust] Merge remote-tracking branch 'origin/master' into useIsolatedClient
a6f5df1 [Michael Armbrust] style
ab07f7e [Michael Armbrust] WIP
4d8bf02 [Michael Armbrust] Remove hive 12 compilation
8843a25 [Michael Armbrust] [SPARK-6908] [SQL] Use isolated Hive client
---
 dev/run-tests                                 |  23 -
 project/MimaExcludes.scala                    |   2 +
 project/SparkBuild.scala                      |   9 +-
 .../plans/logical/basicOperators.scala        |  16 +-
 .../sql/catalyst/plans/logical/commands.scala |   8 +-
 .../spark/sql/catalyst/SqlParserSuite.scala   |   6 +-
 .../org/apache/spark/sql/DataFrame.scala      |   1 -
 .../org/apache/spark/sql/SQLContext.scala     |  11 +-
 .../apache/spark/sql/execution/commands.scala |   4 +-
 .../org/apache/spark/sql/sources/ddl.scala    |  16 +-
 .../hive/thriftserver/SparkSQLCLIDriver.scala |  26 +-
 .../sql/hive/thriftserver/SparkSQLEnv.scala   |   9 +-
 .../execution/HiveCompatibilitySuite.scala    |  12 +-
 .../apache/spark/sql/hive/HiveContext.scala   | 283 ++++++------
 .../spark/sql/hive/HiveMetastoreCatalog.scala | 415 ++++++------------
 .../org/apache/spark/sql/hive/HiveQl.scala    | 126 +++++-
 .../apache/spark/sql/hive/TableReader.scala   |  11 +-
 .../sql/hive/client/ClientInterface.scala     |  41 +-
 .../spark/sql/hive/client/ClientWrapper.scala |  99 +++--
 .../hive/client/IsolatedClientLoader.scala    |  23 +-
 .../sql/hive/client/ReflectionMagic.scala     |   8 +
 .../hive/execution/CreateTableAsSelect.scala  |  33 +-
 .../hive/execution/InsertIntoHiveTable.scala  |  33 +-
 .../spark/sql/hive/execution/commands.scala   |  13 +
 .../apache/spark/sql/hive/test/TestHive.scala |  72 ++-
 sql/hive/src/test/resources/log4j.properties  |   2 +-
 .../spark/sql/hive/ErrorPositionSuite.scala   |  22 +-
 .../sql/hive/MetastoreDataSourcesSuite.scala  |  26 +-
 .../spark/sql/hive/SerializationSuite.scala   |   6 +-
 .../spark/sql/hive/client/VersionsSuite.scala |  78 +++-
 .../hive/execution/HiveComparisonTest.scala   |   2 +
 .../sql/hive/execution/HiveQuerySuite.scala   |   2 +-
 .../sql/hive/execution/PruningSuite.scala     |  15 +-
 33 files changed, 782 insertions(+), 671 deletions(-)

diff --git a/dev/run-tests b/dev/run-tests
index 05c63bce4d40d..ef587a1a5988c 100755
--- a/dev/run-tests
+++ b/dev/run-tests
@@ -142,29 +142,6 @@ CURRENT_BLOCK=$BLOCK_BUILD
 
 {
   HIVE_BUILD_ARGS="$SBT_MAVEN_PROFILES_ARGS -Phive -Phive-thriftserver"
-  HIVE_12_BUILD_ARGS="$HIVE_BUILD_ARGS -Phive-0.12.0"
-
-  # First build with Hive 0.12.0 to ensure patches do not break the Hive 0.12.0 build
-  echo "[info] Compile with Hive 0.12.0"
-  [ -d "lib_managed" ] && rm -rf lib_managed
-  echo "[info] Building Spark with these arguments: $HIVE_12_BUILD_ARGS"
-
-  if [ "${AMPLAB_JENKINS_BUILD_TOOL}" == "maven" ]; then
-    build/mvn $HIVE_12_BUILD_ARGS clean package -DskipTests
-  else
-    # NOTE: echo "q" is needed because sbt on encountering a build file with failure
-    # (either resolution or compilation) prompts the user for input either q, r, etc
-    # to quit or retry. This echo is there to make it not block.
-    # NOTE: Do not quote $BUILD_MVN_PROFILE_ARGS or else it will be interpreted as a
-    # single argument!
-    # QUESTION: Why doesn't 'yes "q"' work?
-    # QUESTION: Why doesn't 'grep -v -e "^\[info\] Resolving"' work?
-    echo -e "q\n" \
-      | build/sbt $HIVE_12_BUILD_ARGS clean hive/compile hive-thriftserver/compile \
-      | grep -v -e "info.*Resolving" -e "warn.*Merging" -e "info.*Including"
-  fi
-
-  # Then build with default Hive version (0.13.1) because tests are based on this version
   echo "[info] Compile with Hive 0.13.1"
   [ -d "lib_managed" ] && rm -rf lib_managed
   echo "[info] Building Spark with these arguments: $HIVE_BUILD_ARGS"
diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
index bf343d4b7e40b..cfe387faec14b 100644
--- a/project/MimaExcludes.scala
+++ b/project/MimaExcludes.scala
@@ -89,6 +89,8 @@ object MimaExcludes {
             ProblemFilters.exclude[MissingMethodProblem](
               "org.apache.spark.mllib.linalg.Vector.numActives")
           ) ++ Seq(
+            // Execution should never be included as its always internal.
+            MimaBuild.excludeSparkPackage("sql.execution"),
             // This `protected[sql]` method was removed in 1.3.1
             ProblemFilters.exclude[MissingMethodProblem](
               "org.apache.spark.sql.SQLContext.checkAnalysis"),
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index b4431c7ee05b6..026855f8f6a5a 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -193,6 +193,7 @@ object SparkBuild extends PomBuild {
    * Usage: `build/sbt sparkShell`
    */
   val sparkShell = taskKey[Unit]("start a spark-shell.")
+  val sparkSql = taskKey[Unit]("starts the spark sql CLI.")
 
   enable(Seq(
     connectInput in run := true,
@@ -203,6 +204,12 @@ object SparkBuild extends PomBuild {
 
     sparkShell := {
       (runMain in Compile).toTask(" org.apache.spark.repl.Main -usejavacp").value
+    },
+
+    javaOptions in Compile += "-Dspark.master=local",
+
+    sparkSql := {
+      (runMain in Compile).toTask(" org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver").value
     }
   ))(assembly)
 
@@ -497,7 +504,7 @@ object TestSettings {
     // Setting SPARK_DIST_CLASSPATH is a simple way to make sure any child processes
     // launched by the tests have access to the correct test-time classpath.
     envVars in Test ++= Map(
-      "SPARK_DIST_CLASSPATH" -> 
+      "SPARK_DIST_CLASSPATH" ->
         (fullClasspath in Test).value.files.map(_.getAbsolutePath).mkString(":").stripSuffix(":"),
       "JAVA_HOME" -> sys.env.get("JAVA_HOME").getOrElse(sys.props("java.home"))),
     javaOptions in Test += "-Dspark.test.home=" + sparkHome,
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicOperators.scala
index ba0abb2df596c..0f349f9d11415 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicOperators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicOperators.scala
@@ -149,16 +149,6 @@ case class InsertIntoTable(
   }
 }
 
-case class CreateTableAsSelect[T](
-    databaseName: Option[String],
-    tableName: String,
-    child: LogicalPlan,
-    allowExisting: Boolean,
-    desc: Option[T] = None) extends UnaryNode {
-  override def output: Seq[Attribute] = Seq.empty[Attribute]
-  override lazy val resolved: Boolean = databaseName != None && childrenResolved
-}
-
 /**
  * A container for holding named common table expressions (CTEs) and a query plan.
  * This operator will be removed during analysis and the relations will be substituted into child.
@@ -184,10 +174,10 @@ case class WriteToFile(
 }
 
 /**
- * @param order  The ordering expressions 
- * @param global True means global sorting apply for entire data set, 
+ * @param order  The ordering expressions
+ * @param global True means global sorting apply for entire data set,
  *               False means sorting only apply within the partition.
- * @param child  Child logical plan              
+ * @param child  Child logical plan
  */
 case class Sort(
     order: Seq[SortOrder],
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/commands.scala
index 45905f8ef98c5..246f4d7e34d3d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/commands.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/commands.scala
@@ -21,9 +21,7 @@ import org.apache.spark.sql.catalyst.expressions.Attribute
 
 /**
  * A logical node that represents a non-query command to be executed by the system.  For example,
- * commands can be used by parsers to represent DDL operations.
+ * commands can be used by parsers to represent DDL operations.  Commands, unlike queries, are
+ * eagerly executed.
  */
-abstract class Command extends LeafNode {
-  self: Product =>
-  def output: Seq[Attribute] = Seq.empty
-}
+trait Command
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/SqlParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/SqlParserSuite.scala
index a652c70560990..890ea2a84b82e 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/SqlParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/SqlParserSuite.scala
@@ -17,11 +17,15 @@
 
 package org.apache.spark.sql.catalyst
 
+import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.plans.logical.Command
 import org.scalatest.FunSuite
 
-private[sql] case class TestCommand(cmd: String) extends Command
+private[sql] case class TestCommand(cmd: String) extends LogicalPlan with Command {
+  override def output: Seq[Attribute] = Seq.empty
+  override def children: Seq[LogicalPlan] = Seq.empty
+}
 
 private[sql] class SuperLongKeywordTestParser extends AbstractSparkSQLParser {
   protected val EXECUTE   = Keyword("THISISASUPERLONGKEYWORDTEST")
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
index 79fbf50300d4d..7947042c14299 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
@@ -143,7 +143,6 @@ class DataFrame private[sql](
     // happen right away to let these side effects take place eagerly.
     case _: Command |
          _: InsertIntoTable |
-         _: CreateTableAsSelect[_] |
          _: CreateTableUsingAsSelect |
          _: WriteToFile =>
       LogicalRDD(queryExecution.analyzed.output, queryExecution.toRdd)(sqlContext)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
index 0563430a6fdc7..0ac0936f0f592 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
@@ -70,7 +70,7 @@ import org.apache.spark.{Partition, SparkContext}
  *   spark-sql> SELECT * FROM src LIMIT 1;
  *
  *-- Exception will be thrown and switch to dialect
- *-- "sql" (for SQLContext) or 
+ *-- "sql" (for SQLContext) or
  *-- "hiveql" (for HiveContext)
  * }}}
  */
@@ -107,7 +107,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
   /**
    * @return Spark SQL configuration
    */
-  protected[sql] def conf = tlSession.get().conf
+  protected[sql] def conf = currentSession().conf
 
   /**
    * Set Spark SQL configuration properties.
@@ -1197,13 +1197,17 @@ class SQLContext(@transient val sparkContext: SparkContext)
          |${stringOrError(executedPlan)}
       """.stripMargin.trim
 
-    override def toString: String =
+    override def toString: String = {
+      def output =
+        analyzed.output.map(o => s"${o.name}: ${o.dataType.simpleString}").mkString(", ")
+
       // TODO previously will output RDD details by run (${stringOrError(toRdd.toDebugString)})
       // however, the `toRdd` will cause the real execution, which is not what we want.
       // We need to think about how to avoid the side effect.
       s"""== Parsed Logical Plan ==
          |${stringOrError(logical)}
          |== Analyzed Logical Plan ==
+         |${stringOrError(output)}
          |${stringOrError(analyzed)}
          |== Optimized Logical Plan ==
          |${stringOrError(optimizedPlan)}
@@ -1212,6 +1216,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
          |Code Generation: ${stringOrError(executedPlan.codegenEnabled)}
          |== RDD ==
       """.stripMargin.trim
+    }
   }
 
   /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala
index 65687db4e6235..388a8184e4cfe 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala
@@ -32,9 +32,11 @@ import org.apache.spark.sql.{DataFrame, SQLConf, SQLContext}
  * A logical command that is executed for its side-effects.  `RunnableCommand`s are
  * wrapped in `ExecutedCommand` during execution.
  */
-trait RunnableCommand extends logical.Command {
+private[sql] trait RunnableCommand extends LogicalPlan with logical.Command {
   self: Product =>
 
+  override def output: Seq[Attribute] = Seq.empty
+  override def children: Seq[LogicalPlan] = Seq.empty
   def run(sqlContext: SQLContext): Seq[Row]
 }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/ddl.scala
index 1abf3aa51cb25..06c64f2bdd59e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/sources/ddl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/ddl.scala
@@ -269,8 +269,10 @@ private[sql] case class ResolvedDataSource(provider: Class[_], relation: BaseRel
  */
 private[sql] case class DescribeCommand(
     table: LogicalPlan,
-    isExtended: Boolean) extends Command {
-  override val output = Seq(
+    isExtended: Boolean) extends LogicalPlan with Command {
+
+  override def children: Seq[LogicalPlan] = Seq.empty
+  override val output: Seq[Attribute] = Seq(
     // Column names are based on Hive.
     AttributeReference("col_name", StringType, nullable = false,
       new MetadataBuilder().putString("comment", "name of the column").build())(),
@@ -292,7 +294,11 @@ private[sql] case class CreateTableUsing(
     temporary: Boolean,
     options: Map[String, String],
     allowExisting: Boolean,
-    managedIfNoPath: Boolean) extends Command
+    managedIfNoPath: Boolean) extends LogicalPlan with Command {
+
+  override def output: Seq[Attribute] = Seq.empty
+  override def children: Seq[LogicalPlan] = Seq.empty
+}
 
 /**
  * A node used to support CTAS statements and saveAsTable for the data source API.
@@ -318,7 +324,7 @@ private[sql] case class CreateTempTableUsing(
     provider: String,
     options: Map[String, String]) extends RunnableCommand {
 
-  def run(sqlContext: SQLContext): Seq[Row] = {
+  override def run(sqlContext: SQLContext): Seq[Row] = {
     val resolved = ResolvedDataSource(sqlContext, userSpecifiedSchema, provider, options)
     sqlContext.registerDataFrameAsTable(
       DataFrame(sqlContext, LogicalRelation(resolved.relation)), tableName)
@@ -333,7 +339,7 @@ private[sql] case class CreateTempTableUsingAsSelect(
     options: Map[String, String],
     query: LogicalPlan) extends RunnableCommand {
 
-  def run(sqlContext: SQLContext): Seq[Row] = {
+  override def run(sqlContext: SQLContext): Seq[Row] = {
     val df = DataFrame(sqlContext, query)
     val resolved = ResolvedDataSource(sqlContext, provider, mode, options, df)
     sqlContext.registerDataFrameAsTable(
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
index b7b6925aa87f7..deb1008c468bf 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
@@ -37,7 +37,7 @@ import org.apache.hadoop.hive.ql.session.SessionState
 import org.apache.thrift.transport.TSocket
 
 import org.apache.spark.Logging
-import org.apache.spark.sql.hive.HiveShim
+import org.apache.spark.sql.hive.{HiveContext, HiveShim}
 import org.apache.spark.util.Utils
 
 private[hive] object SparkSQLCLIDriver {
@@ -74,7 +74,12 @@ private[hive] object SparkSQLCLIDriver {
       System.exit(1)
     }
 
-    val sessionState = new CliSessionState(new HiveConf(classOf[SessionState]))
+    val cliConf = new HiveConf(classOf[SessionState])
+    // Override the location of the metastore since this is only used for local execution.
+    HiveContext.newTemporaryConfiguration().foreach {
+      case (key, value) => cliConf.set(key, value)
+    }
+    val sessionState = new CliSessionState(cliConf)
 
     sessionState.in = System.in
     try {
@@ -91,10 +96,14 @@ private[hive] object SparkSQLCLIDriver {
 
     // Set all properties specified via command line.
     val conf: HiveConf = sessionState.getConf
-    sessionState.cmdProperties.entrySet().foreach { item: java.util.Map.Entry[Object, Object] =>
-      conf.set(item.getKey.asInstanceOf[String], item.getValue.asInstanceOf[String])
-      sessionState.getOverriddenConfigurations.put(
-        item.getKey.asInstanceOf[String], item.getValue.asInstanceOf[String])
+    sessionState.cmdProperties.entrySet().foreach { item =>
+      val key = item.getKey.asInstanceOf[String]
+      val value = item.getValue.asInstanceOf[String]
+      // We do not propagate metastore options to the execution copy of hive.
+      if (key != "javax.jdo.option.ConnectionURL") {
+        conf.set(key, value)
+        sessionState.getOverriddenConfigurations.put(key, value)
+      }
     }
 
     SessionState.start(sessionState)
@@ -138,8 +147,9 @@ private[hive] object SparkSQLCLIDriver {
       case e: UnsupportedEncodingException => System.exit(3)
     }
 
-    // use the specified database if specified
-    cli.processSelectDatabase(sessionState);
+    if (sessionState.database != null) {
+      SparkSQLEnv.hiveContext.runSqlHive(s"USE ${sessionState.database}")
+    }
 
     // Execute -i init files (always in silent mode)
     cli.processInitFiles(sessionState)
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLEnv.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLEnv.scala
index 97b46a01ba5b4..7c0c505e2d61e 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLEnv.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLEnv.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.hive.thriftserver
 
+import java.io.PrintStream
+
 import scala.collection.JavaConversions._
 
 import org.apache.spark.scheduler.StatsReportListener
@@ -39,7 +41,6 @@ private[hive] object SparkSQLEnv extends Logging {
 
       sparkConf
         .setAppName(s"SparkSQL::${Utils.localHostName()}")
-        .set("spark.sql.hive.version", HiveShim.version)
         .set(
           "spark.serializer",
           maybeSerializer.getOrElse("org.apache.spark.serializer.KryoSerializer"))
@@ -51,6 +52,12 @@ private[hive] object SparkSQLEnv extends Logging {
       sparkContext.addSparkListener(new StatsReportListener())
       hiveContext = new HiveContext(sparkContext)
 
+      hiveContext.metadataHive.setOut(new PrintStream(System.out, true, "UTF-8"))
+      hiveContext.metadataHive.setInfo(new PrintStream(System.err, true, "UTF-8"))
+      hiveContext.metadataHive.setError(new PrintStream(System.err, true, "UTF-8"))
+
+      hiveContext.setConf("spark.sql.hive.version", HiveShim.version)
+
       if (log.isDebugEnabled) {
         hiveContext.hiveconf.getAllProperties.toSeq.sorted.foreach { case (k, v) =>
           logDebug(s"HiveConf var: $k=$v")
diff --git a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
index 5e411c2fdba9d..b6245a57074c8 100644
--- a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
+++ b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
@@ -240,7 +240,17 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
 
     // It has a bug and it has been fixed by
     // https://issues.apache.org/jira/browse/HIVE-7673 (in Hive 0.14 and trunk).
-    "input46"
+    "input46",
+
+    // These tests were broken by the hive client isolation PR.
+    "part_inherit_tbl_props",
+    "part_inherit_tbl_props_with_star",
+
+    "nullformatCTAS", // SPARK-7411: need to finish CTAS parser
+
+    // The isolated classloader seemed to make some of our test reset mechanisms less robust.
+    "combine1", // This test changes compression settings in a way that breaks all subsequent tests.
+    "load_dyn_part14.*" // These work alone but fail when run with other tests...
   ) ++ HiveShim.compatibilityBlackList
 
   /**
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
index f25723e53f07c..538c6c7f0a200 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
@@ -17,8 +17,9 @@
 
 package org.apache.spark.sql.hive
 
-import java.io.{BufferedReader, InputStreamReader, PrintStream}
+import java.io.{BufferedReader, File, InputStreamReader, PrintStream}
 import java.sql.Timestamp
+import java.util.{ArrayList => JArrayList}
 
 import org.apache.hadoop.hive.ql.parse.VariableSubstitution
 import org.apache.spark.sql.catalyst.Dialect
@@ -35,15 +36,19 @@ import org.apache.hadoop.hive.ql.processors._
 import org.apache.hadoop.hive.ql.session.SessionState
 import org.apache.hadoop.hive.serde2.io.{DateWritable, TimestampWritable}
 
-import org.apache.spark.SparkContext
+import org.apache.spark.{SparkConf, SparkContext}
 import org.apache.spark.annotation.Experimental
+import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.analysis.{Analyzer, EliminateSubQueries, OverrideCatalog, OverrideFunctionRegistry}
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.execution.{ExecutedCommand, ExtractPythonUdfs, QueryExecutionException, SetCommand}
+import org.apache.spark.sql.hive.client._
 import org.apache.spark.sql.hive.execution.{DescribeHiveTableCommand, HiveNativeCommand}
 import org.apache.spark.sql.sources.{DDLParser, DataSourceStrategy}
 import org.apache.spark.sql.types._
+import org.apache.spark.util.Utils
+
 
 /**
  * This is the HiveQL Dialect, this dialect is strongly bind with HiveContext
@@ -61,6 +66,8 @@ private[hive] class HiveQLDialect extends Dialect {
 class HiveContext(sc: SparkContext) extends SQLContext(sc) {
   self =>
 
+  import HiveContext._
+
   /**
    * When true, enables an experimental feature where metastore tables that use the parquet SerDe
    * are automatically converted to use the Spark SQL parquet table scan, instead of the Hive
@@ -93,9 +100,118 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
   protected[sql] def convertCTAS: Boolean =
     getConf("spark.sql.hive.convertCTAS", "false").toBoolean
 
+  /**
+   * The version of the hive client that will be used to communicate with the metastore.  Note that
+   * this does not necessarily need to be the same version of Hive that is used internally by
+   * Spark SQL for execution.
+   */
+  protected[hive] def hiveMetastoreVersion: String =
+    getConf(HIVE_METASTORE_VERSION, hiveExecutionVersion)
+
+  /**
+   * The location of the jars that should be used to instantiate the HiveMetastoreClient.  This
+   * property can be one of three options:
+   *  - a classpath in the standard format for both hive and hadoop.
+   *  - builtin - attempt to discover the jars that were used to load Spark SQL and use those. This
+   *              option is only valid when using the execution version of Hive.
+   *  - maven - download the correct version of hive on demand from maven.
+   */
+  protected[hive] def hiveMetastoreJars: String =
+    getConf(HIVE_METASTORE_JARS, "builtin")
+
   @transient
   protected[sql] lazy val substitutor = new VariableSubstitution()
 
+  /**
+   * The copy of the hive client that is used for execution.  Currently this must always be
+   * Hive 13 as this is the version of Hive that is packaged with Spark SQL.  This copy of the
+   * client is used for execution related tasks like registering temporary functions or ensuring
+   * that the ThreadLocal SessionState is correctly populated.  This copy of Hive is *not* used
+   * for storing peristent metadata, and only point to a dummy metastore in a temporary directory.
+   */
+  @transient
+  protected[hive] lazy val executionHive: ClientWrapper = {
+    logInfo(s"Initilizing execution hive, version $hiveExecutionVersion")
+    new ClientWrapper(
+      version = IsolatedClientLoader.hiveVersion(hiveExecutionVersion),
+      config = newTemporaryConfiguration())
+  }
+  SessionState.setCurrentSessionState(executionHive.state)
+
+  /**
+   * The copy of the Hive client that is used to retrieve metadata from the Hive MetaStore.
+   * The version of the Hive client that is used here must match the metastore that is configured
+   * in the hive-site.xml file.
+   */
+  @transient
+  protected[hive] lazy val metadataHive: ClientInterface = {
+    val metaVersion = IsolatedClientLoader.hiveVersion(hiveMetastoreVersion)
+
+    // We instantiate a HiveConf here to read in the hive-site.xml file and then pass the options
+    // into the isolated client loader
+    val metadataConf = new HiveConf()
+    // `configure` goes second to override other settings.
+    val allConfig = metadataConf.iterator.map(e => e.getKey -> e.getValue).toMap ++ configure
+
+    val isolatedLoader = if (hiveMetastoreJars == "builtin") {
+      if (hiveExecutionVersion != hiveMetastoreVersion) {
+        throw new IllegalArgumentException(
+          "Builtin jars can only be used when hive execution version == hive metastore version. " +
+          s"Execution: ${hiveExecutionVersion} != Metastore: ${hiveMetastoreVersion}. " +
+          "Specify a vaild path to the correct hive jars using $HIVE_METASTORE_JARS " +
+          s"or change $HIVE_METASTORE_VERSION to $hiveExecutionVersion.")
+      }
+      val jars = getClass.getClassLoader match {
+        case urlClassLoader: java.net.URLClassLoader => urlClassLoader.getURLs
+        case other =>
+          throw new IllegalArgumentException(
+            "Unable to locate hive jars to connect to metastore " +
+            s"using classloader ${other.getClass.getName}. " +
+            "Please set spark.sql.hive.metastore.jars")
+      }
+
+      logInfo(
+        s"Initializing HiveMetastoreConnection version $hiveMetastoreVersion using Spark classes.")
+      new IsolatedClientLoader(
+        version = metaVersion,
+        execJars = jars.toSeq,
+        config = allConfig,
+        isolationOn = true)
+    } else if (hiveMetastoreJars == "maven") {
+      // TODO: Support for loading the jars from an already downloaded location.
+      logInfo(
+        s"Initializing HiveMetastoreConnection version $hiveMetastoreVersion using maven.")
+      IsolatedClientLoader.forVersion(hiveMetastoreVersion, allConfig )
+    } else {
+      // Convert to files and expand any directories.
+      val jars =
+        hiveMetastoreJars
+          .split(File.pathSeparator)
+          .flatMap {
+            case path if new File(path).getName() == "*" =>
+              val files = new File(path).getParentFile().listFiles()
+              if (files == null) {
+                logWarning(s"Hive jar path '$path' does not exist.")
+                Nil
+              } else {
+                files.filter(_.getName().toLowerCase().endsWith(".jar"))
+              }
+            case path =>
+              new File(path) :: Nil
+          }
+          .map(_.toURI.toURL)
+
+      logInfo(
+        s"Initializing HiveMetastoreConnection version $hiveMetastoreVersion using $jars")
+      new IsolatedClientLoader(
+        version = metaVersion,
+        execJars = jars.toSeq,
+        config = allConfig,
+        isolationOn = true)
+    }
+    isolatedLoader.client
+  }
+
   protected[sql] override def parseSql(sql: String): LogicalPlan = {
     super.parseSql(substitutor.substitute(hiveconf, sql))
   }
@@ -178,15 +294,10 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
         // recorded in the Hive metastore.
         // This logic is based on org.apache.hadoop.hive.ql.exec.StatsTask.aggregateStats().
         if (newTotalSize > 0 && newTotalSize != oldTotalSize) {
-          tableParameters.put(HiveShim.getStatsSetupConstTotalSize, newTotalSize.toString)
-          val hiveTTable = relation.hiveQlTable.getTTable
-          hiveTTable.setParameters(tableParameters)
-          val tableFullName =
-            relation.hiveQlTable.getDbName + "." + relation.hiveQlTable.getTableName
-
-          catalog.synchronized {
-            catalog.client.alterTable(tableFullName, new Table(hiveTTable))
-          }
+          catalog.client.alterTable(
+            relation.table.copy(
+              properties = relation.table.properties +
+                (HiveShim.getStatsSetupConstTotalSize -> newTotalSize.toString)))
         }
       case otherRelation =>
         throw new UnsupportedOperationException(
@@ -194,47 +305,19 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
     }
   }
 
-  // Circular buffer to hold what hive prints to STDOUT and ERR.  Only printed when failures occur.
-  @transient
-  protected lazy val outputBuffer = new java.io.OutputStream {
-    var pos: Int = 0
-    var buffer = new Array[Int](10240)
-    def write(i: Int): Unit = {
-      buffer(pos) = i
-      pos = (pos + 1) % buffer.size
-    }
-
-    override def toString: String = {
-      val (end, start) = buffer.splitAt(pos)
-      val input = new java.io.InputStream {
-        val iterator = (start ++ end).iterator
-
-        def read(): Int = if (iterator.hasNext) iterator.next() else -1
-      }
-      val reader = new BufferedReader(new InputStreamReader(input))
-      val stringBuilder = new StringBuilder
-      var line = reader.readLine()
-      while(line != null) {
-        stringBuilder.append(line)
-        stringBuilder.append("\n")
-        line = reader.readLine()
-      }
-      stringBuilder.toString()
-    }
-  }
-
-  protected[hive] def sessionState = tlSession.get().asInstanceOf[this.SQLSession].sessionState
-
   protected[hive] def hiveconf = tlSession.get().asInstanceOf[this.SQLSession].hiveconf
 
   override def setConf(key: String, value: String): Unit = {
     super.setConf(key, value)
-    runSqlHive(s"SET $key=$value")
+    hiveconf.set(key, value)
+    executionHive.runSqlHive(s"SET $key=$value")
+    metadataHive.runSqlHive(s"SET $key=$value")
   }
 
   /* A catalyst metadata catalog that points to the Hive Metastore. */
   @transient
-  override protected[sql] lazy val catalog = new HiveMetastoreCatalog(this) with OverrideCatalog
+  override protected[sql] lazy val catalog =
+    new HiveMetastoreCatalog(metadataHive, this) with OverrideCatalog
 
   // Note that HiveUDFs will be overridden by functions registered in this context.
   @transient
@@ -261,16 +344,14 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
     new this.SQLSession()
   }
 
+  /** Overridden by child classes that need to set configuration before the client init. */
+  protected def configure(): Map[String, String] = Map.empty
+
   protected[hive] class SQLSession extends super.SQLSession {
     protected[sql] override lazy val conf: SQLConf = new SQLConf {
       override def dialect: String = getConf(SQLConf.DIALECT, "hiveql")
     }
 
-    protected[hive] lazy val hiveconf: HiveConf = {
-      setConf(sessionState.getConf.getAllProperties)
-      sessionState.getConf
-    }
-
     /**
      * SQLConf and HiveConf contracts:
      *
@@ -285,78 +366,12 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
         state = new SessionState(new HiveConf(classOf[SessionState]))
         SessionState.start(state)
       }
-      if (state.out == null) {
-        state.out = new PrintStream(outputBuffer, true, "UTF-8")
-      }
-      if (state.err == null) {
-        state.err = new PrintStream(outputBuffer, true, "UTF-8")
-      }
       state
     }
-  }
-
-  /**
-   * Runs the specified SQL query using Hive.
-   */
-  protected[sql] def runSqlHive(sql: String): Seq[String] = {
-    val maxResults = 100000
-    val results = runHive(sql, maxResults)
-    // It is very confusing when you only get back some of the results...
-    if (results.size == maxResults) sys.error("RESULTS POSSIBLY TRUNCATED")
-    results
-  }
-
-  /**
-   * Execute the command using Hive and return the results as a sequence. Each element
-   * in the sequence is one row.
-   */
-  protected def runHive(cmd: String, maxRows: Int = 1000): Seq[String] = synchronized {
-    try {
-      val cmd_trimmed: String = cmd.trim()
-      val tokens: Array[String] = cmd_trimmed.split("\\s+")
-      val cmd_1: String = cmd_trimmed.substring(tokens(0).length()).trim()
-      val proc: CommandProcessor = HiveShim.getCommandProcessor(Array(tokens(0)), hiveconf)
-
-      // Makes sure the session represented by the `sessionState` field is activated. This implies
-      // Spark SQL Hive support uses a single `SessionState` for all Hive operations and breaks
-      // session isolation under multi-user scenarios (i.e. HiveThriftServer2).
-      // TODO Fix session isolation
-      if (SessionState.get() != sessionState) {
-        SessionState.start(sessionState)
-      }
 
-      proc match {
-        case driver: Driver =>
-          val results = HiveShim.createDriverResultsArray
-          val response: CommandProcessorResponse = driver.run(cmd)
-          // Throw an exception if there is an error in query processing.
-          if (response.getResponseCode != 0) {
-            driver.close()
-            throw new QueryExecutionException(response.getErrorMessage)
-          }
-          driver.setMaxRows(maxRows)
-          driver.getResults(results)
-          driver.close()
-          HiveShim.processResults(results)
-        case _ =>
-          if (sessionState.out != null) {
-            sessionState.out.println(tokens(0) + " " + cmd_1)
-          }
-          Seq(proc.run(cmd_1).getResponseCode.toString)
-      }
-    } catch {
-      case e: Exception =>
-        logError(
-          s"""
-            |======================
-            |HIVE FAILURE OUTPUT
-            |======================
-            |${outputBuffer.toString}
-            |======================
-            |END HIVE FAILURE OUTPUT
-            |======================
-          """.stripMargin)
-        throw e
+    protected[hive] lazy val hiveconf: HiveConf = {
+      setConf(sessionState.getConf.getAllProperties)
+      sessionState.getConf
     }
   }
 
@@ -391,17 +406,23 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
     )
   }
 
+  protected[hive] def runSqlHive(sql: String): Seq[String] = {
+    if (sql.toLowerCase.contains("create temporary function")) {
+      executionHive.runSqlHive(sql)
+    } else if (sql.trim.toLowerCase.startsWith("set")) {
+      metadataHive.runSqlHive(sql)
+      executionHive.runSqlHive(sql)
+    } else {
+      metadataHive.runSqlHive(sql)
+    }
+  }
+
   @transient
   override protected[sql] val planner = hivePlanner
 
   /** Extends QueryExecution with hive specific features. */
   protected[sql] class QueryExecution(logicalPlan: LogicalPlan)
     extends super.QueryExecution(logicalPlan) {
-    // Like what we do in runHive, makes sure the session represented by the
-    // `sessionState` field is activated.
-    if (SessionState.get() != sessionState) {
-      SessionState.start(sessionState)
-    }
 
     /**
      * Returns the result as a hive compatible sequence of strings.  For native commands, the
@@ -439,7 +460,21 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
 }
 
 
-private object HiveContext {
+private[hive] object HiveContext {
+  /** The version of hive used internally by Spark SQL. */
+  val hiveExecutionVersion: String = "0.13.1"
+
+  val HIVE_METASTORE_VERSION: String = "spark.sql.hive.metastore.version"
+  val HIVE_METASTORE_JARS: String = "spark.sql.hive.metastore.jars"
+
+  /** Constructs a configuration for hive, where the metastore is located in a temp directory. */
+  def newTemporaryConfiguration(): Map[String, String] = {
+    val tempDir = Utils.createTempDir()
+    val localMetastore = new File(tempDir, "metastore").getAbsolutePath
+    Map(
+      "javax.jdo.option.ConnectionURL" -> s"jdbc:derby:;databaseName=$localMetastore;create=true")
+  }
+
   protected val primitiveTypes =
     Seq(StringType, IntegerType, LongType, DoubleType, FloatType, BooleanType, ByteType,
       ShortType, DateType, TimestampType, BinaryType)
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
index 4d222cf88e5e8..8fcdf3d0ab119 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
@@ -22,6 +22,8 @@ import java.util.{List => JList}
 
 import com.google.common.base.Objects
 import com.google.common.cache.{CacheBuilder, CacheLoader, LoadingCache}
+
+import org.apache.hadoop.fs.Path
 import org.apache.hadoop.hive.metastore.api.{FieldSchema, Partition => TPartition, Table => TTable}
 import org.apache.hadoop.hive.metastore.{TableType, Warehouse}
 import org.apache.hadoop.hive.ql.metadata._
@@ -32,6 +34,7 @@ import org.apache.hadoop.hive.serde2.{Deserializer, SerDeException}
 import org.apache.hadoop.util.ReflectionUtils
 
 import org.apache.spark.Logging
+import org.apache.spark.sql.hive.client.IsolatedClientLoader
 import org.apache.spark.sql.{SaveMode, AnalysisException, SQLContext}
 import org.apache.spark.sql.catalyst.analysis.{MultiInstanceRelation, NoSuchTableException, Catalog, OverrideCatalog}
 import org.apache.spark.sql.catalyst.expressions._
@@ -39,6 +42,7 @@ import org.apache.spark.sql.catalyst.planning.PhysicalOperation
 import org.apache.spark.sql.catalyst.plans.logical
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules._
+import org.apache.spark.sql.hive.client._
 import org.apache.spark.sql.parquet.{ParquetRelation2, Partition => ParquetPartition, PartitionSpec}
 import org.apache.spark.sql.sources.{CreateTableUsingAsSelect, DDLParser, LogicalRelation, ResolvedDataSource}
 import org.apache.spark.sql.types._
@@ -47,11 +51,10 @@ import org.apache.spark.util.Utils
 /* Implicit conversions */
 import scala.collection.JavaConversions._
 
-private[hive] class HiveMetastoreCatalog(hive: HiveContext) extends Catalog with Logging {
-  import org.apache.spark.sql.hive.HiveMetastoreTypes._
+private[hive] class HiveMetastoreCatalog(val client: ClientInterface, hive: HiveContext)
+  extends Catalog with Logging {
 
-  /** Connection to hive metastore.  Usages should lock on `this`. */
-  protected[hive] val client = Hive.get(hive.hiveconf)
+  import org.apache.spark.sql.hive.HiveMetastoreTypes._
 
   /** Usages should lock on `this`. */
   protected[hive] lazy val hiveWarehouse = new Warehouse(hive.hiveconf)
@@ -67,14 +70,12 @@ private[hive] class HiveMetastoreCatalog(hive: HiveContext) extends Catalog with
     val cacheLoader = new CacheLoader[QualifiedTableName, LogicalPlan]() {
       override def load(in: QualifiedTableName): LogicalPlan = {
         logDebug(s"Creating new cached data source for $in")
-        val table = HiveMetastoreCatalog.this.synchronized {
-          client.getTable(in.database, in.name)
-        }
+        val table = client.getTable(in.database, in.name)
 
         def schemaStringFromParts: Option[String] = {
-          Option(table.getProperty("spark.sql.sources.schema.numParts")).map { numParts =>
+          table.properties.get("spark.sql.sources.schema.numParts").map { numParts =>
             val parts = (0 until numParts.toInt).map { index =>
-              val part = table.getProperty(s"spark.sql.sources.schema.part.${index}")
+              val part = table.properties.get(s"spark.sql.sources.schema.part.${index}").orNull
               if (part == null) {
                 throw new AnalysisException(
                   s"Could not read schema from the metastore because it is corrupted " +
@@ -92,20 +93,20 @@ private[hive] class HiveMetastoreCatalog(hive: HiveContext) extends Catalog with
         // After SPARK-6024, we removed this flag.
         // Although we are not using spark.sql.sources.schema any more, we need to still support.
         val schemaString =
-          Option(table.getProperty("spark.sql.sources.schema")).orElse(schemaStringFromParts)
+          table.properties.get("spark.sql.sources.schema").orElse(schemaStringFromParts)
 
         val userSpecifiedSchema =
           schemaString.map(s => DataType.fromJson(s).asInstanceOf[StructType])
 
         // It does not appear that the ql client for the metastore has a way to enumerate all the
         // SerDe properties directly...
-        val options = table.getTTable.getSd.getSerdeInfo.getParameters.toMap
+        val options = table.serdeProperties
 
         val resolvedRelation =
           ResolvedDataSource(
             hive,
             userSpecifiedSchema,
-            table.getProperty("spark.sql.sources.provider"),
+            table.properties("spark.sql.sources.provider"),
             options)
 
         LogicalRelation(resolvedRelation.relation)
@@ -144,49 +145,53 @@ private[hive] class HiveMetastoreCatalog(hive: HiveContext) extends Catalog with
       options: Map[String, String],
       isExternal: Boolean): Unit = {
     val (dbName, tblName) = processDatabaseAndTableName("default", tableName)
-    val tbl = new Table(dbName, tblName)
-
-    tbl.setProperty("spark.sql.sources.provider", provider)
+    val tableProperties = new scala.collection.mutable.HashMap[String, String]
+    tableProperties.put("spark.sql.sources.provider", provider)
     if (userSpecifiedSchema.isDefined) {
       val threshold = hive.conf.schemaStringLengthThreshold
       val schemaJsonString = userSpecifiedSchema.get.json
       // Split the JSON string.
       val parts = schemaJsonString.grouped(threshold).toSeq
-      tbl.setProperty("spark.sql.sources.schema.numParts", parts.size.toString)
+      tableProperties.put("spark.sql.sources.schema.numParts", parts.size.toString)
       parts.zipWithIndex.foreach { case (part, index) =>
-        tbl.setProperty(s"spark.sql.sources.schema.part.${index}", part)
+        tableProperties.put(s"spark.sql.sources.schema.part.${index}", part)
       }
     }
-    options.foreach { case (key, value) => tbl.setSerdeParam(key, value) }
 
-    if (isExternal) {
-      tbl.setProperty("EXTERNAL", "TRUE")
-      tbl.setTableType(TableType.EXTERNAL_TABLE)
+    val tableType = if (isExternal) {
+      tableProperties.put("EXTERNAL", "TRUE")
+      ExternalTable
     } else {
-      tbl.setProperty("EXTERNAL", "FALSE")
-      tbl.setTableType(TableType.MANAGED_TABLE)
-    }
-
-    // create the table
-    synchronized {
-      client.createTable(tbl, false)
-    }
+      tableProperties.put("EXTERNAL", "FALSE")
+      ManagedTable
+    }
+
+    client.createTable(
+      HiveTable(
+        specifiedDatabase = Option(dbName),
+        name = tblName,
+        schema = Seq.empty,
+        partitionColumns = Seq.empty,
+        tableType = tableType,
+        properties = tableProperties.toMap,
+        serdeProperties = options))
   }
 
-  def hiveDefaultTableFilePath(tableName: String): String = synchronized {
-    val currentDatabase = client.getDatabase(hive.sessionState.getCurrentDatabase)
-
-    hiveWarehouse.getTablePath(currentDatabase, tableName).toString
+  def hiveDefaultTableFilePath(tableName: String): String = {
+    // Code based on: hiveWarehouse.getTablePath(currentDatabase, tableName)
+    new Path(
+      new Path(client.getDatabase(client.currentDatabase).location),
+      tableName.toLowerCase).toString
   }
 
-  def tableExists(tableIdentifier: Seq[String]): Boolean = synchronized {
+  def tableExists(tableIdentifier: Seq[String]): Boolean = {
     val tableIdent = processTableIdentifier(tableIdentifier)
     val databaseName =
       tableIdent
         .lift(tableIdent.size - 2)
-        .getOrElse(hive.sessionState.getCurrentDatabase)
+        .getOrElse(client.currentDatabase)
     val tblName = tableIdent.last
-    client.getTable(databaseName, tblName, false) != null
+    client.getTableOption(databaseName, tblName).isDefined
   }
 
   def lookupRelation(
@@ -194,18 +199,11 @@ private[hive] class HiveMetastoreCatalog(hive: HiveContext) extends Catalog with
       alias: Option[String]): LogicalPlan = {
     val tableIdent = processTableIdentifier(tableIdentifier)
     val databaseName = tableIdent.lift(tableIdent.size - 2).getOrElse(
-      hive.sessionState.getCurrentDatabase)
+      client.currentDatabase)
     val tblName = tableIdent.last
-    val table = try {
-      synchronized {
-        client.getTable(databaseName, tblName)
-      }
-    } catch {
-      case te: org.apache.hadoop.hive.ql.metadata.InvalidTableException =>
-        throw new NoSuchTableException
-    }
+    val table = client.getTable(databaseName, tblName)
 
-    if (table.getProperty("spark.sql.sources.provider") != null) {
+    if (table.properties.get("spark.sql.sources.provider").isDefined) {
       val dataSourceTable =
         cachedDataSourceTables(QualifiedTableName(databaseName, tblName).toLowerCase)
       // Then, if alias is specified, wrap the table with a Subquery using the alias.
@@ -215,22 +213,16 @@ private[hive] class HiveMetastoreCatalog(hive: HiveContext) extends Catalog with
           Subquery(tableIdent.last, dataSourceTable))
 
       withAlias
-    } else if (table.isView) {
-      // if the unresolved relation is from hive view
-      // parse the text into logic node.
-      HiveQl.createPlanForView(table, alias)
+    } else if (table.tableType == VirtualView) {
+      val viewText = table.viewText.getOrElse(sys.error("Invalid view without text."))
+      alias match {
+        // because hive use things like `_c0` to build the expanded text
+        // currently we cannot support view from "create view v1(c1) as ..."
+        case None => Subquery(table.name, HiveQl.createPlan(viewText))
+        case Some(aliasText) => Subquery(aliasText, HiveQl.createPlan(viewText))
+      }
     } else {
-      val partitions: Seq[Partition] =
-        if (table.isPartitioned) {
-          synchronized {
-            HiveShim.getAllPartitionsOf(client, table).toSeq
-          }
-        } else {
-          Nil
-        }
-
-      MetastoreRelation(databaseName, tblName, alias)(
-        table.getTTable, partitions.map(part => part.getTPartition))(hive)
+      MetastoreRelation(databaseName, tblName, alias)(table)(hive)
     }
   }
 
@@ -318,178 +310,10 @@ private[hive] class HiveMetastoreCatalog(hive: HiveContext) extends Catalog with
     result.newInstance()
   }
 
-  override def getTables(databaseName: Option[String]): Seq[(String, Boolean)] = synchronized {
-    val dbName = if (!caseSensitive) {
-      if (databaseName.isDefined) Some(databaseName.get.toLowerCase) else None
-    } else {
-      databaseName
-    }
-    val db = dbName.getOrElse(hive.sessionState.getCurrentDatabase)
-
-    client.getAllTables(db).map(tableName => (tableName, false))
-  }
-
-  /**
-   * Create table with specified database, table name, table description and schema
-   * @param databaseName Database Name
-   * @param tableName Table Name
-   * @param schema Schema of the new table, if not specified, will use the schema
-   *               specified in crtTbl
-   * @param allowExisting if true, ignore AlreadyExistsException
-   * @param desc CreateTableDesc object which contains the SerDe info. Currently
-   *               we support most of the features except the bucket.
-   */
-  def createTable(
-      databaseName: String,
-      tableName: String,
-      schema: Seq[Attribute],
-      allowExisting: Boolean = false,
-      desc: Option[CreateTableDesc] = None) {
-    val hconf = hive.hiveconf
-
-    val (dbName, tblName) = processDatabaseAndTableName(databaseName, tableName)
-    val tbl = new Table(dbName, tblName)
-
-    val crtTbl: CreateTableDesc = desc.getOrElse(null)
-
-    // We should respect the passed in schema, unless it's not set
-    val hiveSchema: JList[FieldSchema] = if (schema == null || schema.isEmpty) {
-      crtTbl.getCols
-    } else {
-      schema.map(attr => new FieldSchema(attr.name, toMetastoreType(attr.dataType), null))
-    }
-    tbl.setFields(hiveSchema)
-
-    // Most of code are similar with the DDLTask.createTable() of Hive,
-    if (crtTbl != null && crtTbl.getTblProps() != null) {
-      tbl.getTTable().getParameters().putAll(crtTbl.getTblProps())
-    }
-
-    if (crtTbl != null && crtTbl.getPartCols() != null) {
-      tbl.setPartCols(crtTbl.getPartCols())
-    }
-
-    if (crtTbl != null && crtTbl.getStorageHandler() != null) {
-      tbl.setProperty(
-        org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE,
-        crtTbl.getStorageHandler())
-    }
-
-    /*
-     * We use LazySimpleSerDe by default.
-     *
-     * If the user didn't specify a SerDe, and any of the columns are not simple
-     * types, we will have to use DynamicSerDe instead.
-     */
-    if (crtTbl == null || crtTbl.getSerName() == null) {
-      val storageHandler = tbl.getStorageHandler()
-      if (storageHandler == null) {
-        logInfo(s"Default to LazySimpleSerDe for table $dbName.$tblName")
-        tbl.setSerializationLib(classOf[LazySimpleSerDe].getName())
-
-        import org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-        import org.apache.hadoop.io.Text
-        import org.apache.hadoop.mapred.TextInputFormat
-
-        tbl.setInputFormatClass(classOf[TextInputFormat])
-        tbl.setOutputFormatClass(classOf[HiveIgnoreKeyTextOutputFormat[Text, Text]])
-        tbl.setSerializationLib("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe")
-      } else {
-        val serDeClassName = storageHandler.getSerDeClass().getName()
-        logInfo(s"Use StorageHandler-supplied $serDeClassName for table $dbName.$tblName")
-        tbl.setSerializationLib(serDeClassName)
-      }
-    } else {
-      // let's validate that the serde exists
-      val serdeName = crtTbl.getSerName()
-      try {
-        val d = ReflectionUtils.newInstance(hconf.getClassByName(serdeName), hconf)
-        if (d != null) {
-          logDebug("Found class for $serdeName")
-        }
-      } catch {
-        case e: SerDeException => throw new HiveException("Cannot validate serde: " + serdeName, e)
-      }
-      tbl.setSerializationLib(serdeName)
-    }
-
-    if (crtTbl != null && crtTbl.getFieldDelim() != null) {
-      tbl.setSerdeParam(serdeConstants.FIELD_DELIM, crtTbl.getFieldDelim())
-      tbl.setSerdeParam(serdeConstants.SERIALIZATION_FORMAT, crtTbl.getFieldDelim())
-    }
-    if (crtTbl != null && crtTbl.getFieldEscape() != null) {
-      tbl.setSerdeParam(serdeConstants.ESCAPE_CHAR, crtTbl.getFieldEscape())
-    }
-
-    if (crtTbl != null && crtTbl.getCollItemDelim() != null) {
-      tbl.setSerdeParam(serdeConstants.COLLECTION_DELIM, crtTbl.getCollItemDelim())
-    }
-    if (crtTbl != null && crtTbl.getMapKeyDelim() != null) {
-      tbl.setSerdeParam(serdeConstants.MAPKEY_DELIM, crtTbl.getMapKeyDelim())
-    }
-    if (crtTbl != null && crtTbl.getLineDelim() != null) {
-      tbl.setSerdeParam(serdeConstants.LINE_DELIM, crtTbl.getLineDelim())
-    }
-    HiveShim.setTblNullFormat(crtTbl, tbl)
-
-    if (crtTbl != null && crtTbl.getSerdeProps() != null) {
-      val iter = crtTbl.getSerdeProps().entrySet().iterator()
-      while (iter.hasNext()) {
-        val m = iter.next()
-        tbl.setSerdeParam(m.getKey(), m.getValue())
-      }
-    }
-
-    if (crtTbl != null && crtTbl.getComment() != null) {
-      tbl.setProperty("comment", crtTbl.getComment())
-    }
-
-    if (crtTbl != null && crtTbl.getLocation() != null) {
-      HiveShim.setLocation(tbl, crtTbl)
-    }
-
-    if (crtTbl != null && crtTbl.getSkewedColNames() != null) {
-      tbl.setSkewedColNames(crtTbl.getSkewedColNames())
-    }
-    if (crtTbl != null && crtTbl.getSkewedColValues() != null) {
-      tbl.setSkewedColValues(crtTbl.getSkewedColValues())
-    }
-
-    if (crtTbl != null) {
-      tbl.setStoredAsSubDirectories(crtTbl.isStoredAsSubDirectories())
-      tbl.setInputFormatClass(crtTbl.getInputFormat())
-      tbl.setOutputFormatClass(crtTbl.getOutputFormat())
-    }
-
-    tbl.getTTable().getSd().setInputFormat(tbl.getInputFormatClass().getName())
-    tbl.getTTable().getSd().setOutputFormat(tbl.getOutputFormatClass().getName())
-
-    if (crtTbl != null && crtTbl.isExternal()) {
-      tbl.setProperty("EXTERNAL", "TRUE")
-      tbl.setTableType(TableType.EXTERNAL_TABLE)
-    }
-
-    // set owner
-    try {
-      tbl.setOwner(hive.hiveconf.getUser)
-    } catch {
-      case e: IOException => throw new HiveException("Unable to get current user", e)
-    }
-
-    // set create time
-    tbl.setCreateTime((System.currentTimeMillis() / 1000).asInstanceOf[Int])
-
-    // TODO add bucket support
-    // TODO set more info if Hive upgrade
+  override def getTables(databaseName: Option[String]): Seq[(String, Boolean)] = {
+    val db = databaseName.getOrElse(client.currentDatabase)
 
-    // create the table
-    synchronized {
-      try client.createTable(tbl, allowExisting) catch {
-        case e: org.apache.hadoop.hive.metastore.api.AlreadyExistsException
-          if allowExisting => // Do nothing
-        case e: Throwable => throw e
-      }
-    }
+    client.listTables(db).map(tableName => (tableName, false))
   }
 
   protected def processDatabaseAndTableName(
@@ -598,42 +422,11 @@ private[hive] class HiveMetastoreCatalog(hive: HiveContext) extends Catalog with
       // Wait until children are resolved.
       case p: LogicalPlan if !p.childrenResolved => p
 
-      // TODO extra is in type of ASTNode which means the logical plan is not resolved
-      // Need to think about how to implement the CreateTableAsSelect.resolved
-      case CreateTableAsSelect(db, tableName, child, allowExisting, Some(extra: ASTNode)) =>
-        val (dbName, tblName) = processDatabaseAndTableName(db, tableName)
-        val databaseName = dbName.getOrElse(hive.sessionState.getCurrentDatabase)
-
-        // Get the CreateTableDesc from Hive SemanticAnalyzer
-        val desc: Option[CreateTableDesc] = if (tableExists(Seq(databaseName, tblName))) {
-          None
-        } else {
-          val sa = new SemanticAnalyzer(hive.hiveconf) {
-            override def analyzeInternal(ast: ASTNode) {
-              // A hack to intercept the SemanticAnalyzer.analyzeInternal,
-              // to ignore the SELECT clause of the CTAS
-              val method = classOf[SemanticAnalyzer].getDeclaredMethod(
-                "analyzeCreateTable", classOf[ASTNode], classOf[QB])
-              method.setAccessible(true)
-              method.invoke(this, ast, this.getQB)
-            }
-          }
-
-          sa.analyze(extra, new Context(hive.hiveconf))
-          Some(sa.getQB().getTableDesc)
-        }
-
-        // Check if the query specifies file format or storage handler.
-        val hasStorageSpec = desc match {
-          case Some(crtTbl) =>
-            crtTbl != null && (crtTbl.getSerName != null || crtTbl.getStorageHandler != null)
-          case None => false
-        }
-
-        if (hive.convertCTAS && !hasStorageSpec) {
+      case CreateTableAsSelect(desc, child, allowExisting) =>
+        if (hive.convertCTAS && !desc.serde.isDefined) {
           // Do the conversion when spark.sql.hive.convertCTAS is true and the query
           // does not specify any storage format (file format and storage handler).
-          if (dbName.isDefined) {
+          if (desc.specifiedDatabase.isDefined) {
             throw new AnalysisException(
               "Cannot specify database name in a CTAS statement " +
               "when spark.sql.hive.convertCTAS is set to true.")
@@ -641,7 +434,7 @@ private[hive] class HiveMetastoreCatalog(hive: HiveContext) extends Catalog with
 
           val mode = if (allowExisting) SaveMode.Ignore else SaveMode.ErrorIfExists
           CreateTableUsingAsSelect(
-            tblName,
+            desc.name,
             hive.conf.defaultDataSourceName,
             temporary = false,
             mode,
@@ -650,19 +443,19 @@ private[hive] class HiveMetastoreCatalog(hive: HiveContext) extends Catalog with
           )
         } else {
           execution.CreateTableAsSelect(
-            databaseName,
-            tableName,
+            desc.copy(
+              specifiedDatabase = Option(desc.specifiedDatabase.getOrElse(client.currentDatabase))),
             child,
-            allowExisting,
-            desc)
+            allowExisting)
         }
 
       case p: LogicalPlan if p.resolved => p
 
-      case p @ CreateTableAsSelect(db, tableName, child, allowExisting, None) =>
-        val (dbName, tblName) = processDatabaseAndTableName(db, tableName)
+      case p @ CreateTableAsSelect(desc, child, allowExisting) =>
+        val (dbName, tblName) = processDatabaseAndTableName(desc.database, desc.name)
+
         if (hive.convertCTAS) {
-          if (dbName.isDefined) {
+          if (desc.specifiedDatabase.isDefined) {
             throw new AnalysisException(
               "Cannot specify database name in a CTAS statement " +
               "when spark.sql.hive.convertCTAS is set to true.")
@@ -678,13 +471,10 @@ private[hive] class HiveMetastoreCatalog(hive: HiveContext) extends Catalog with
             child
           )
         } else {
-          val databaseName = dbName.getOrElse(hive.sessionState.getCurrentDatabase)
           execution.CreateTableAsSelect(
-            databaseName,
-            tableName,
+            desc,
             child,
-            allowExisting,
-            None)
+            allowExisting)
         }
     }
   }
@@ -767,7 +557,7 @@ private[hive] case class InsertIntoHiveTable(
 
 private[hive] case class MetastoreRelation
     (databaseName: String, tableName: String, alias: Option[String])
-    (val table: TTable, val partitions: Seq[TPartition])
+    (val table: HiveTable)
     (@transient sqlContext: SQLContext)
   extends LeafNode with MultiInstanceRelation {
 
@@ -786,16 +576,63 @@ private[hive] case class MetastoreRelation
     Objects.hashCode(databaseName, tableName, alias, output)
   }
 
-  // TODO: Can we use org.apache.hadoop.hive.ql.metadata.Table as the type of table and
-  // use org.apache.hadoop.hive.ql.metadata.Partition as the type of elements of partitions.
-  // Right now, using org.apache.hadoop.hive.ql.metadata.Table and
-  // org.apache.hadoop.hive.ql.metadata.Partition will cause a NotSerializableException
-  // which indicates the SerDe we used is not Serializable.
+  @transient val hiveQlTable: Table = {
+    // We start by constructing an API table as Hive performs several important transformations
+    // internally when converting an API table to a QL table.
+    val tTable = new org.apache.hadoop.hive.metastore.api.Table()
+    tTable.setTableName(table.name)
+    tTable.setDbName(table.database)
+
+    val tableParameters = new java.util.HashMap[String, String]()
+    tTable.setParameters(tableParameters)
+    table.properties.foreach { case (k, v) => tableParameters.put(k, v) }
+
+    tTable.setTableType(table.tableType.name)
+
+    val sd = new org.apache.hadoop.hive.metastore.api.StorageDescriptor()
+    tTable.setSd(sd)
+    sd.setCols(table.schema.map(c => new FieldSchema(c.name, c.hiveType, c.comment)))
+    tTable.setPartitionKeys(
+      table.partitionColumns.map(c => new FieldSchema(c.name, c.hiveType, c.comment)))
+
+    table.location.foreach(sd.setLocation)
+    table.inputFormat.foreach(sd.setInputFormat)
+    table.outputFormat.foreach(sd.setOutputFormat)
+
+    val serdeInfo = new org.apache.hadoop.hive.metastore.api.SerDeInfo
+    sd.setSerdeInfo(serdeInfo)
+    table.serde.foreach(serdeInfo.setSerializationLib)
+    val serdeParameters = new java.util.HashMap[String, String]()
+    serdeInfo.setParameters(serdeParameters)
+    table.serdeProperties.foreach { case (k, v) => serdeParameters.put(k, v) }
+
+    new Table(tTable)
+  }
+
+  @transient val hiveQlPartitions: Seq[Partition] = table.getAllPartitions.map { p =>
+    val tPartition = new org.apache.hadoop.hive.metastore.api.Partition
+    tPartition.setDbName(databaseName)
+    tPartition.setTableName(tableName)
+    tPartition.setValues(p.values)
+
+    val sd = new org.apache.hadoop.hive.metastore.api.StorageDescriptor()
+    tPartition.setSd(sd)
+    sd.setCols(table.schema.map(c => new FieldSchema(c.name, c.hiveType, c.comment)))
+
+    sd.setLocation(p.storage.location)
+    sd.setInputFormat(p.storage.inputFormat)
+    sd.setOutputFormat(p.storage.outputFormat)
+
+    val serdeInfo = new org.apache.hadoop.hive.metastore.api.SerDeInfo
+    sd.setSerdeInfo(serdeInfo)
+    serdeInfo.setSerializationLib(p.storage.serde)
 
-  @transient val hiveQlTable: Table = new Table(table)
+    val serdeParameters = new java.util.HashMap[String, String]()
+    serdeInfo.setParameters(serdeParameters)
+    table.serdeProperties.foreach { case (k, v) => serdeParameters.put(k, v) }
+    p.storage.serdeProperties.foreach { case (k, v) => serdeParameters.put(k, v) }
 
-  @transient val hiveQlPartitions: Seq[Partition] = partitions.map { p =>
-    new Partition(hiveQlTable, p)
+    new Partition(hiveQlTable, tPartition)
   }
 
   @transient override lazy val statistics: Statistics = Statistics(
@@ -865,7 +702,7 @@ private[hive] case class MetastoreRelation
   val columnOrdinals = AttributeMap(attributes.zipWithIndex)
 
   override def newInstance(): MetastoreRelation = {
-    MetastoreRelation(databaseName, tableName, alias)(table, partitions)(sqlContext)
+    MetastoreRelation(databaseName, tableName, alias)(table)(sqlContext)
   }
 }
 
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
index 6176aee25e7a4..f30b196734c40 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
@@ -38,6 +38,7 @@ import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.trees.CurrentOrigin
 import org.apache.spark.sql.execution.ExplainCommand
 import org.apache.spark.sql.sources.DescribeCommand
+import org.apache.spark.sql.hive.client._
 import org.apache.spark.sql.hive.execution.{HiveNativeCommand, DropTable, AnalyzeTable, HiveScriptIOSchema}
 import org.apache.spark.sql.types._
 import org.apache.spark.util.random.RandomSampler
@@ -50,7 +51,19 @@ import scala.collection.JavaConversions._
  * back for Hive to execute natively.  Will be replaced with a native command that contains the
  * cmd string.
  */
-private[hive] case object NativePlaceholder extends Command
+private[hive] case object NativePlaceholder extends LogicalPlan {
+  override def children: Seq[LogicalPlan] = Seq.empty
+  override def output: Seq[Attribute] = Seq.empty
+}
+
+case class CreateTableAsSelect(
+    tableDesc: HiveTable,
+    child: LogicalPlan,
+    allowExisting: Boolean) extends UnaryNode with Command {
+
+  override def output: Seq[Attribute] = Seq.empty[Attribute]
+  override lazy val resolved: Boolean = tableDesc.specifiedDatabase.isDefined && childrenResolved
+}
 
 /** Provides a mapping from HiveQL statements to catalyst logical plans and expression trees. */
 private[hive] object HiveQl {
@@ -78,16 +91,16 @@ private[hive] object HiveQl {
     "TOK_ALTERVIEW_DROPPARTS",
     "TOK_ALTERVIEW_PROPERTIES",
     "TOK_ALTERVIEW_RENAME",
-    
+
     "TOK_CREATEDATABASE",
     "TOK_CREATEFUNCTION",
     "TOK_CREATEINDEX",
     "TOK_CREATEROLE",
     "TOK_CREATEVIEW",
-    
+
     "TOK_DESCDATABASE",
     "TOK_DESCFUNCTION",
-    
+
     "TOK_DROPDATABASE",
     "TOK_DROPFUNCTION",
     "TOK_DROPINDEX",
@@ -95,22 +108,22 @@ private[hive] object HiveQl {
     "TOK_DROPTABLE_PROPERTIES",
     "TOK_DROPVIEW",
     "TOK_DROPVIEW_PROPERTIES",
-    
+
     "TOK_EXPORT",
-    
+
     "TOK_GRANT",
     "TOK_GRANT_ROLE",
-    
+
     "TOK_IMPORT",
-    
+
     "TOK_LOAD",
-    
+
     "TOK_LOCKTABLE",
-    
+
     "TOK_MSCK",
-    
+
     "TOK_REVOKE",
-    
+
     "TOK_SHOW_COMPACTIONS",
     "TOK_SHOW_CREATETABLE",
     "TOK_SHOW_GRANT",
@@ -127,9 +140,9 @@ private[hive] object HiveQl {
     "TOK_SHOWINDEXES",
     "TOK_SHOWLOCKS",
     "TOK_SHOWPARTITIONS",
-    
+
     "TOK_SWITCHDATABASE",
-    
+
     "TOK_UNLOCKTABLE"
   )
 
@@ -259,6 +272,7 @@ private[hive] object HiveQl {
           case otherMessage =>
             throw new AnalysisException(otherMessage)
         }
+      case e: MatchError => throw e
       case e: Exception =>
         throw new AnalysisException(e.getMessage)
       case e: NotImplementedError =>
@@ -272,14 +286,6 @@ private[hive] object HiveQl {
     }
   }
 
-  /** Creates LogicalPlan for a given VIEW */
-  def createPlanForView(view: Table, alias: Option[String]): Subquery = alias match {
-    // because hive use things like `_c0` to build the expanded text
-    // currently we cannot support view from "create view v1(c1) as ..."
-    case None => Subquery(view.getTableName, createPlan(view.getViewExpandedText))
-    case Some(aliasText) => Subquery(aliasText, createPlan(view.getViewExpandedText))
-  }
-
   def parseDdl(ddl: String): Seq[Attribute] = {
     val tree =
       try {
@@ -453,6 +459,14 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C
     (keys, bitmasks)
   }
 
+  protected def getProperties(node: Node): Seq[(String, String)] = node match {
+    case Token("TOK_TABLEPROPLIST", list) =>
+      list.map {
+        case Token("TOK_TABLEPROPERTY", Token(key, Nil) :: Token(value, Nil) :: Nil) =>
+          (unquoteString(key) -> unquoteString(value))
+      }
+  }
+
   protected def nodeToPlan(node: Node): LogicalPlan = node match {
     // Special drop table that also uncaches.
     case Token("TOK_DROPTABLE",
@@ -562,7 +576,62 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C
           children)
       val (db, tableName) = extractDbNameTableName(tableNameParts)
 
-      CreateTableAsSelect(db, tableName, nodeToPlan(query), allowExisting != None, Some(node))
+      var tableDesc =
+        HiveTable(
+          specifiedDatabase = db,
+          name = tableName,
+          schema = Seq.empty,
+          partitionColumns = Seq.empty,
+          properties = Map.empty,
+          serdeProperties = Map.empty,
+          tableType = ManagedTable,
+          location = None,
+          inputFormat = None,
+          outputFormat = None,
+          serde = None)
+
+      // TODO: Handle all the cases here...
+      children.foreach {
+        case Token("TOK_TBLRCFILE", Nil) =>
+          import org.apache.hadoop.hive.ql.io.{RCFileInputFormat, RCFileOutputFormat}
+          tableDesc = tableDesc.copy(
+            outputFormat = Option(classOf[RCFileOutputFormat].getName),
+            inputFormat = Option(classOf[RCFileInputFormat[_, _]].getName))
+
+          if (tableDesc.serde.isEmpty) {
+            tableDesc = tableDesc.copy(
+              serde = Option("org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe"))
+          }
+        case Token("TOK_TBLORCFILE", Nil) =>
+          tableDesc = tableDesc.copy(
+            inputFormat = Option("org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"),
+            outputFormat = Option("org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat"),
+            serde = Option("org.apache.hadoop.hive.ql.io.orc.OrcSerde"))
+
+        case Token("TOK_TBLPARQUETFILE", Nil) =>
+          tableDesc = tableDesc.copy(
+            inputFormat = Option("org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat"),
+            outputFormat = Option("org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat"),
+            serde = Option("org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe"))
+
+        case Token("TOK_TABLESERIALIZER",
+               Token("TOK_SERDENAME", Token(serdeName, Nil) :: otherProps) :: Nil) =>
+          tableDesc = tableDesc.copy(serde = Option(unquoteString(serdeName)))
+
+          otherProps match {
+            case Token("TOK_TABLEPROPERTIES", list :: Nil) :: Nil =>
+              tableDesc = tableDesc.copy(
+                serdeProperties = tableDesc.serdeProperties ++ getProperties(list))
+            case Nil =>
+          }
+
+        case Token("TOK_TABLEPROPERTIES", list :: Nil) =>
+          tableDesc = tableDesc.copy(properties = tableDesc.properties ++ getProperties(list))
+
+        case _ =>
+      }
+
+      CreateTableAsSelect(tableDesc, nodeToPlan(query), allowExisting != None)
 
     // If its not a "CREATE TABLE AS" like above then just pass it back to hive as a native command.
     case Token("TOK_CREATETABLE", _) => NativePlaceholder
@@ -759,7 +828,7 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C
               case Token("TOK_CUBE_GROUPBY", children) =>
                 Cube(children.map(nodeToExpr), withLateralView, selectExpressions)
               case _ => sys.error("Expect WITH CUBE")
-            }), 
+            }),
             Some(Project(selectExpressions, withLateralView))).flatten.head
         }
 
@@ -1077,6 +1146,15 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C
   }
 
   protected val escapedIdentifier = "`([^`]+)`".r
+  protected val doubleQuotedString = "\"([^\"]+)\"".r
+  protected val singleQuotedString = "'([^']+)'".r
+
+  protected def unquoteString(str: String) = str match {
+    case singleQuotedString(s) => s
+    case doubleQuotedString(s) => s
+    case other => other
+  }
+
   /** Strips backticks from ident if present */
   protected def cleanIdentifier(ident: String): String = ident match {
     case escapedIdentifier(i) => i
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala
index e556c74ffb015..b69312f0f8717 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala
@@ -32,6 +32,7 @@ import org.apache.hadoop.mapred.{FileInputFormat, InputFormat, JobConf}
 
 import org.apache.spark.SerializableWritable
 import org.apache.spark.broadcast.Broadcast
+import org.apache.spark.Logging
 import org.apache.spark.rdd.{EmptyRDD, HadoopRDD, RDD, UnionRDD}
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.types.DateUtils
@@ -57,7 +58,7 @@ class HadoopTableReader(
     @transient relation: MetastoreRelation,
     @transient sc: HiveContext,
     @transient hiveExtraConf: HiveConf)
-  extends TableReader {
+  extends TableReader with Logging {
 
   // Hadoop honors "mapred.map.tasks" as hint, but will ignore when mapred.job.tracker is "local".
   // https://hadoop.apache.org/docs/r1.0.4/mapred-default.html
@@ -78,7 +79,7 @@ class HadoopTableReader(
     makeRDDForTable(
       hiveTable,
       Class.forName(
-        relation.tableDesc.getSerdeClassName, true, sc.sessionState.getConf.getClassLoader)
+        relation.tableDesc.getSerdeClassName, true, Utils.getSparkClassLoader)
         .asInstanceOf[Class[Deserializer]],
       filterOpt = None)
 
@@ -145,7 +146,7 @@ class HadoopTableReader(
       partitionToDeserializer: Map[HivePartition,
       Class[_ <: Deserializer]],
       filterOpt: Option[PathFilter]): RDD[Row] = {
-        
+
     // SPARK-5068:get FileStatus and do the filtering locally when the path is not exists
     def verifyPartitionPath(
         partitionToDeserializer: Map[HivePartition, Class[_ <: Deserializer]]):
@@ -288,7 +289,7 @@ class HadoopTableReader(
   }
 }
 
-private[hive] object HadoopTableReader extends HiveInspectors {
+private[hive] object HadoopTableReader extends HiveInspectors with Logging {
   /**
    * Curried. After given an argument for 'path', the resulting JobConf => Unit closure is used to
    * instantiate a HadoopRDD.
@@ -329,6 +330,8 @@ private[hive] object HadoopTableReader extends HiveInspectors {
         tableDeser.getObjectInspector).asInstanceOf[StructObjectInspector]
     }
 
+    logDebug(soi.toString)
+
     val (fieldRefs, fieldOrdinals) = nonPartitionKeyAttrs.map { case (attr, ordinal) =>
       soi.getStructFieldRef(attr.name) -> ordinal
     }.unzip
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/ClientInterface.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/ClientInterface.scala
index a863aa77cb7e0..0a1d761a52f88 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/ClientInterface.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/ClientInterface.scala
@@ -17,30 +17,35 @@
 
 package org.apache.spark.sql.hive.client
 
+import java.io.PrintStream
+import java.util.{Map => JMap}
+
 import org.apache.spark.sql.catalyst.analysis.{NoSuchDatabaseException, NoSuchTableException}
 
-case class HiveDatabase(
+private[hive] case class HiveDatabase(
     name: String,
     location: String)
 
-abstract class TableType { val name: String }
-case object ExternalTable extends TableType { override val name = "EXTERNAL_TABLE" }
-case object IndexTable extends TableType { override val name = "INDEX_TABLE" }
-case object ManagedTable extends TableType { override val name = "MANAGED_TABLE" }
-case object VirtualView extends TableType { override val name = "VIRTUAL_VIEW" }
+private[hive] abstract class TableType { val name: String }
+private[hive] case object ExternalTable extends TableType { override val name = "EXTERNAL_TABLE" }
+private[hive] case object IndexTable extends TableType { override val name = "INDEX_TABLE" }
+private[hive] case object ManagedTable extends TableType { override val name = "MANAGED_TABLE" }
+private[hive] case object VirtualView extends TableType { override val name = "VIRTUAL_VIEW" }
 
-case class HiveStorageDescriptor(
+// TODO: Use this for Tables and Partitions
+private[hive] case class HiveStorageDescriptor(
     location: String,
     inputFormat: String,
     outputFormat: String,
-    serde: String)
+    serde: String,
+    serdeProperties: Map[String, String])
 
-case class HivePartition(
+private[hive] case class HivePartition(
     values: Seq[String],
     storage: HiveStorageDescriptor)
 
-case class HiveColumn(name: String, hiveType: String, comment: String)
-case class HiveTable(
+private[hive] case class HiveColumn(name: String, hiveType: String, comment: String)
+private[hive] case class HiveTable(
     specifiedDatabase: Option[String],
     name: String,
     schema: Seq[HiveColumn],
@@ -51,7 +56,8 @@ case class HiveTable(
     location: Option[String] = None,
     inputFormat: Option[String] = None,
     outputFormat: Option[String] = None,
-    serde: Option[String] = None) {
+    serde: Option[String] = None,
+    viewText: Option[String] = None) {
 
   @transient
   private[client] var client: ClientInterface = _
@@ -76,13 +82,17 @@ case class HiveTable(
  * internal and external classloaders for a given version of Hive and thus must expose only
  * shared classes.
  */
-trait ClientInterface {
+private[hive] trait ClientInterface {
   /**
    * Runs a HiveQL command using Hive, returning the results as a list of strings.  Each row will
    * result in one string.
    */
   def runSqlHive(sql: String): Seq[String]
 
+  def setOut(stream: PrintStream): Unit
+  def setInfo(stream: PrintStream): Unit
+  def setError(stream: PrintStream): Unit
+
   /** Returns the names of all tables in the given database. */
   def listTables(dbName: String): Seq[String]
 
@@ -114,6 +124,11 @@ trait ClientInterface {
   /** Creates a new database with the given name. */
   def createDatabase(database: HiveDatabase): Unit
 
+  /** Returns the specified paritition or None if it does not exist. */
+  def getPartitionOption(
+      hTable: HiveTable,
+      partitionSpec: JMap[String, String]): Option[HivePartition]
+
   /** Returns all partitions for the given table. */
   def getAllPartitions(hTable: HiveTable): Seq[HivePartition]
 
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/ClientWrapper.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/ClientWrapper.scala
index ea52fea037f1f..6bca9d0179fe3 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/ClientWrapper.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/ClientWrapper.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.hive.client
 
 import java.io.{BufferedReader, InputStreamReader, File, PrintStream}
 import java.net.URI
-import java.util.{ArrayList => JArrayList}
+import java.util.{ArrayList => JArrayList, Map => JMap, List => JList, Set => JSet}
 
 import scala.collection.JavaConversions._
 import scala.language.reflectiveCalls
@@ -27,6 +27,7 @@ import scala.language.reflectiveCalls
 import org.apache.hadoop.fs.Path
 import org.apache.hadoop.hive.metastore.api.Database
 import org.apache.hadoop.hive.conf.HiveConf
+import org.apache.hadoop.hive.metastore.TableType
 import org.apache.hadoop.hive.metastore.api
 import org.apache.hadoop.hive.metastore.api.FieldSchema
 import org.apache.hadoop.hive.ql.metadata
@@ -54,19 +55,13 @@ import org.apache.spark.sql.execution.QueryExecutionException
  * @param config  a collection of configuration options that will be added to the hive conf before
  *                opening the hive client.
  */
-class ClientWrapper(
+private[hive] class ClientWrapper(
     version: HiveVersion,
     config: Map[String, String])
   extends ClientInterface
   with Logging
   with ReflectionMagic {
 
-  private val conf = new HiveConf(classOf[SessionState])
-  config.foreach { case (k, v) =>
-    logDebug(s"Hive Config: $k=$v")
-    conf.set(k, v)
-  }
-
   // Circular buffer to hold what hive prints to STDOUT and ERR.  Only printed when failures occur.
   private val outputBuffer = new java.io.OutputStream {
     var pos: Int = 0
@@ -99,17 +94,31 @@ class ClientWrapper(
     val original = Thread.currentThread().getContextClassLoader
     Thread.currentThread().setContextClassLoader(getClass.getClassLoader)
     val ret = try {
-      val newState = new SessionState(conf)
-      SessionState.start(newState)
-      newState.out = new PrintStream(outputBuffer, true, "UTF-8")
-      newState.err = new PrintStream(outputBuffer, true, "UTF-8")
-      newState
+      val oldState = SessionState.get()
+      if (oldState == null) {
+        val initialConf = new HiveConf(classOf[SessionState])
+        config.foreach { case (k, v) =>
+          logDebug(s"Hive Config: $k=$v")
+          initialConf.set(k, v)
+        }
+        val newState = new SessionState(initialConf)
+        SessionState.start(newState)
+        newState.out = new PrintStream(outputBuffer, true, "UTF-8")
+        newState.err = new PrintStream(outputBuffer, true, "UTF-8")
+        newState
+      } else {
+        oldState
+      }
     } finally {
       Thread.currentThread().setContextClassLoader(original)
     }
     ret
   }
 
+  /** Returns the configuration for the current session. */
+  def conf: HiveConf = SessionState.get().getConf
+
+  // TODO: should be a def?s
   private val client = Hive.get(conf)
 
   /**
@@ -133,6 +142,18 @@ class ClientWrapper(
     ret
   }
 
+  def setOut(stream: PrintStream): Unit = withHiveState {
+    state.out = stream
+  }
+
+  def setInfo(stream: PrintStream): Unit = withHiveState {
+    state.info = stream
+  }
+
+  def setError(stream: PrintStream): Unit = withHiveState {
+    state.err = stream
+  }
+
   override def currentDatabase: String = withHiveState {
     state.getCurrentDatabase
   }
@@ -171,14 +192,20 @@ class ClientWrapper(
         partitionColumns = h.getPartCols.map(f => HiveColumn(f.getName, f.getType, f.getComment)),
         properties = h.getParameters.toMap,
         serdeProperties = h.getTTable.getSd.getSerdeInfo.getParameters.toMap,
-        tableType = ManagedTable, // TODO
+        tableType = h.getTableType match {
+          case TableType.MANAGED_TABLE => ManagedTable
+          case TableType.EXTERNAL_TABLE => ExternalTable
+          case TableType.VIRTUAL_VIEW => VirtualView
+          case TableType.INDEX_TABLE => IndexTable
+        },
         location = version match {
           case hive.v12 => Option(h.call[URI]("getDataLocation")).map(_.toString)
           case hive.v13 => Option(h.call[Path]("getDataLocation")).map(_.toString)
         },
         inputFormat = Option(h.getInputFormatClass).map(_.getName),
         outputFormat = Option(h.getOutputFormatClass).map(_.getName),
-        serde = Option(h.getSerializationLib)).withClient(this)
+        serde = Option(h.getSerializationLib),
+        viewText = Option(h.getViewExpandedText)).withClient(this)
     }
     converted
   }
@@ -223,27 +250,40 @@ class ClientWrapper(
     client.alterTable(table.qualifiedName, qlTable)
   }
 
+  private def toHivePartition(partition: metadata.Partition): HivePartition = {
+    val apiPartition = partition.getTPartition
+    HivePartition(
+      values = Option(apiPartition.getValues).map(_.toSeq).getOrElse(Seq.empty),
+      storage = HiveStorageDescriptor(
+        location = apiPartition.getSd.getLocation,
+        inputFormat = apiPartition.getSd.getInputFormat,
+        outputFormat = apiPartition.getSd.getOutputFormat,
+        serde = apiPartition.getSd.getSerdeInfo.getSerializationLib,
+        serdeProperties = apiPartition.getSd.getSerdeInfo.getParameters.toMap))
+  }
+
+  override def getPartitionOption(
+      table: HiveTable,
+      partitionSpec: JMap[String, String]): Option[HivePartition] = withHiveState {
+
+    val qlTable = toQlTable(table)
+    val qlPartition = client.getPartition(qlTable, partitionSpec, false)
+    Option(qlPartition).map(toHivePartition)
+  }
+
   override def getAllPartitions(hTable: HiveTable): Seq[HivePartition] = withHiveState {
     val qlTable = toQlTable(hTable)
     val qlPartitions = version match {
       case hive.v12 =>
-        client.call[metadata.Table, Set[metadata.Partition]]("getAllPartitionsForPruner", qlTable)
+        client.call[metadata.Table, JSet[metadata.Partition]]("getAllPartitionsForPruner", qlTable)
       case hive.v13 =>
-        client.call[metadata.Table, Set[metadata.Partition]]("getAllPartitionsOf", qlTable)
+        client.call[metadata.Table, JSet[metadata.Partition]]("getAllPartitionsOf", qlTable)
     }
-    qlPartitions.map(_.getTPartition).map { p =>
-      HivePartition(
-        values = Option(p.getValues).map(_.toSeq).getOrElse(Seq.empty),
-        storage = HiveStorageDescriptor(
-          location = p.getSd.getLocation,
-          inputFormat = p.getSd.getInputFormat,
-          outputFormat = p.getSd.getOutputFormat,
-          serde = p.getSd.getSerdeInfo.getSerializationLib))
-    }.toSeq
+    qlPartitions.toSeq.map(toHivePartition)
   }
 
   override def listTables(dbName: String): Seq[String] = withHiveState {
-    client.getAllTables
+    client.getAllTables(dbName)
   }
 
   /**
@@ -267,11 +307,12 @@ class ClientWrapper(
     try {
       val cmd_trimmed: String = cmd.trim()
       val tokens: Array[String] = cmd_trimmed.split("\\s+")
+      // The remainder of the command.
       val cmd_1: String = cmd_trimmed.substring(tokens(0).length()).trim()
       val proc: CommandProcessor = version match {
         case hive.v12 =>
           classOf[CommandProcessorFactory]
-            .callStatic[String, HiveConf, CommandProcessor]("get", cmd_1, conf)
+            .callStatic[String, HiveConf, CommandProcessor]("get", tokens(0), conf)
         case hive.v13 =>
           classOf[CommandProcessorFactory]
             .callStatic[Array[String], HiveConf, CommandProcessor]("get", Array(tokens(0)), conf)
@@ -294,7 +335,7 @@ class ClientWrapper(
               res.toSeq
             case hive.v13 =>
               val res = new JArrayList[Object]
-              driver.call[JArrayList[Object], Boolean]("getResults", res)
+              driver.call[JList[Object], Boolean]("getResults", res)
               res.map { r =>
                 r match {
                   case s: String => s
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
index 710dbca6e3c66..7f94c93ba49c1 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.sql.hive.client
 
 import java.io.File
-import java.net.URLClassLoader
+import java.net.{URL, URLClassLoader}
 import java.util
 
 import scala.language.reflectiveCalls
@@ -30,9 +30,10 @@ import org.apache.spark.Logging
 import org.apache.spark.deploy.SparkSubmitUtils
 
 import org.apache.spark.sql.catalyst.util.quietly
+import org.apache.spark.sql.hive.HiveContext
 
 /** Factory for `IsolatedClientLoader` with specific versions of hive. */
-object IsolatedClientLoader {
+private[hive] object IsolatedClientLoader {
   /**
    * Creates isolated Hive client loaders by downloading the requested version from maven.
    */
@@ -49,7 +50,7 @@ object IsolatedClientLoader {
     case "13" | "0.13" | "0.13.0" | "0.13.1" => hive.v13
   }
 
-  private def downloadVersion(version: HiveVersion): Seq[File] = {
+  private def downloadVersion(version: HiveVersion): Seq[URL] = {
     val hiveArtifacts =
       (Seq("hive-metastore", "hive-exec", "hive-common", "hive-serde") ++
         (if (version.hasBuiltinsJar) "hive-builtins" :: Nil else Nil))
@@ -72,10 +73,10 @@ object IsolatedClientLoader {
     tempDir.mkdir()
 
     allFiles.foreach(f => FileUtils.copyFileToDirectory(f, tempDir))
-    tempDir.listFiles()
+    tempDir.listFiles().map(_.toURL)
   }
 
-  private def resolvedVersions = new scala.collection.mutable.HashMap[HiveVersion, Seq[File]]
+  private def resolvedVersions = new scala.collection.mutable.HashMap[HiveVersion, Seq[URL]]
 }
 
 /**
@@ -99,9 +100,9 @@ object IsolatedClientLoader {
  * @param baseClassLoader The spark classloader that is used to load shared classes.
  *
  */
-class IsolatedClientLoader(
+private[hive] class IsolatedClientLoader(
     val version: HiveVersion,
-    val execJars: Seq[File] = Seq.empty,
+    val execJars: Seq[URL] = Seq.empty,
     val config: Map[String, String] = Map.empty,
     val isolationOn: Boolean = true,
     val rootClassLoader: ClassLoader = ClassLoader.getSystemClassLoader.getParent.getParent,
@@ -112,7 +113,7 @@ class IsolatedClientLoader(
   assert(Try(baseClassLoader.loadClass("org.apache.hive.HiveConf")).isFailure)
 
   /** All jars used by the hive specific classloader. */
-  protected def allJars = execJars.map(_.toURI.toURL).toArray
+  protected def allJars = execJars.toArray
 
   protected def isSharedClass(name: String): Boolean =
     name.contains("slf4j") ||
@@ -166,6 +167,12 @@ class IsolatedClientLoader(
       .getConstructors.head
       .newInstance(version, config)
       .asInstanceOf[ClientInterface]
+  } catch {
+    case ReflectionException(cnf: NoClassDefFoundError) =>
+      throw new ClassNotFoundException(
+        s"$cnf when creating Hive client using classpath: ${execJars.mkString(", ")}\n" +
+         "Please make sure that jars for your version of hive and hadoop are included in the " +
+        s"paths passed to ${HiveContext.HIVE_METASTORE_JARS}.")
   } finally {
     Thread.currentThread.setContextClassLoader(baseClassLoader)
   }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/ReflectionMagic.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/ReflectionMagic.scala
index 90d03049356b5..c600b158c5460 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/ReflectionMagic.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/ReflectionMagic.scala
@@ -19,6 +19,14 @@ package org.apache.spark.sql.hive.client
 
 import scala.reflect._
 
+/** Unwraps reflection exceptions. */
+private[client] object ReflectionException {
+  def unapply(a: Throwable): Option[Throwable] = a match {
+    case ite: java.lang.reflect.InvocationTargetException => Option(ite.getCause)
+    case _ => None
+  }
+}
+
 /**
  * Provides implicit functions on any object for calling methods reflectively.
  */
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateTableAsSelect.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateTableAsSelect.scala
index 76a1965f3cb25..91e6ac4032204 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateTableAsSelect.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateTableAsSelect.scala
@@ -24,8 +24,8 @@ import org.apache.spark.sql.SQLContext
 import org.apache.spark.sql.catalyst.expressions.Row
 import org.apache.spark.sql.catalyst.plans.logical.{InsertIntoTable, LogicalPlan}
 import org.apache.spark.sql.execution.RunnableCommand
-import org.apache.spark.sql.hive.HiveContext
-import org.apache.spark.sql.hive.MetastoreRelation
+import org.apache.spark.sql.hive.client.{HiveTable, HiveColumn}
+import org.apache.spark.sql.hive.{HiveContext, MetastoreRelation, HiveMetastoreTypes}
 
 /**
  * Create table and insert the query result into it.
@@ -39,17 +39,34 @@ import org.apache.spark.sql.hive.MetastoreRelation
  */
 private[hive]
 case class CreateTableAsSelect(
-    database: String,
-    tableName: String,
+    tableDesc: HiveTable,
     query: LogicalPlan,
-    allowExisting: Boolean,
-    desc: Option[CreateTableDesc]) extends RunnableCommand {
+    allowExisting: Boolean)
+  extends RunnableCommand {
+
+  def database: String = tableDesc.database
+  def tableName: String = tableDesc.name
 
   override def run(sqlContext: SQLContext): Seq[Row] = {
     val hiveContext = sqlContext.asInstanceOf[HiveContext]
     lazy val metastoreRelation: MetastoreRelation = {
-      // Create Hive Table
-      hiveContext.catalog.createTable(database, tableName, query.output, allowExisting, desc)
+      import org.apache.hadoop.hive.serde2.`lazy`.LazySimpleSerDe
+      import org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+      import org.apache.hadoop.io.Text
+      import org.apache.hadoop.mapred.TextInputFormat
+
+      val withSchema =
+        tableDesc.copy(
+          schema =
+            query.output.map(c =>
+              HiveColumn(c.name, HiveMetastoreTypes.toMetastoreType(c.dataType), null)),
+          inputFormat =
+            tableDesc.inputFormat.orElse(Some(classOf[TextInputFormat].getName)),
+          outputFormat =
+            tableDesc.outputFormat
+              .orElse(Some(classOf[HiveIgnoreKeyTextOutputFormat[Text, Text]].getName)),
+          serde = tableDesc.serde.orElse(Some(classOf[LazySimpleSerDe].getName())))
+      hiveContext.catalog.client.createTable(withSchema)
 
       // Get the Metastore Relation
       hiveContext.catalog.lookupRelation(Seq(database, tableName), None) match {
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
index 89995a91b1a92..de8954d5dec99 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
@@ -200,9 +200,7 @@ case class InsertIntoHiveTable(
           orderedPartitionSpec.put(entry.getName,partitionSpec.get(entry.getName).getOrElse(""))
       }
       val partVals = MetaStoreUtils.getPvals(table.hiveQlTable.getPartCols, partitionSpec)
-      catalog.synchronized {
-        catalog.client.validatePartitionNameCharacters(partVals)
-      }
+
       // inheritTableSpecs is set to true. It should be set to false for a IMPORT query
       // which is currently considered as a Hive native command.
       val inheritTableSpecs = true
@@ -211,7 +209,7 @@ case class InsertIntoHiveTable(
       if (numDynamicPartitions > 0) {
         catalog.synchronized {
           catalog.client.loadDynamicPartitions(
-            outputPath,
+            outputPath.toString,
             qualifiedTableName,
             orderedPartitionSpec,
             overwrite,
@@ -224,31 +222,28 @@ case class InsertIntoHiveTable(
         // ifNotExists is only valid with static partition, refer to
         // https://cwiki.apache.org/confluence/display/Hive/LanguageManual+DML#LanguageManualDML-InsertingdataintoHiveTablesfromqueries
         // scalastyle:on
-        val oldPart = catalog.synchronized {
-          catalog.client.getPartition(
-            catalog.client.getTable(qualifiedTableName), partitionSpec, false)
-        }
-        if (oldPart == null || !ifNotExists) {
-          catalog.synchronized {
+        val oldPart =
+          catalog.client.getPartitionOption(
+            catalog.client.getTable(table.databaseName, table.tableName),
+            partitionSpec)
+
+        if (oldPart.isEmpty || !ifNotExists) {
             catalog.client.loadPartition(
-              outputPath,
+              outputPath.toString,
               qualifiedTableName,
               orderedPartitionSpec,
               overwrite,
               holdDDLTime,
               inheritTableSpecs,
               isSkewedStoreAsSubdir)
-          }
         }
       }
     } else {
-      catalog.synchronized {
-        catalog.client.loadTable(
-          outputPath,
-          qualifiedTableName,
-          overwrite,
-          holdDDLTime)
-      }
+      catalog.client.loadTable(
+        outputPath.toString, // TODO: URI
+        qualifiedTableName,
+        overwrite,
+        holdDDLTime)
     }
 
     // Invalidate the cache.
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/commands.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/commands.scala
index a40a1e53117cd..abab1a223a43a 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/commands.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/commands.scala
@@ -27,6 +27,7 @@ import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.execution.RunnableCommand
 import org.apache.spark.sql.hive.HiveContext
 import org.apache.spark.sql.types._
+import org.apache.spark.util.Utils
 
 /**
  * Analyzes the given table in the current database to generate statistics, which will be
@@ -84,8 +85,20 @@ case class AddJar(path: String) extends RunnableCommand {
 
   override def run(sqlContext: SQLContext): Seq[Row] = {
     val hiveContext = sqlContext.asInstanceOf[HiveContext]
+    val currentClassLoader = Utils.getContextOrSparkClassLoader
+
+    // Add jar to current context
+    val jarURL = new java.io.File(path).toURL
+    val newClassLoader = new java.net.URLClassLoader(Array(jarURL), currentClassLoader)
+    Thread.currentThread.setContextClassLoader(newClassLoader)
+    org.apache.hadoop.hive.ql.metadata.Hive.get().getConf().setClassLoader(newClassLoader)
+
+    // Add jar to isolated hive classloader
     hiveContext.runSqlHive(s"ADD JAR $path")
+
+    // Add jar to executors
     hiveContext.sparkContext.addJar(path)
+
     Seq(Row(0))
   }
 }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
index ca84b43a998b8..1f40a5340c2ce 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.hive.test
 import java.io.File
 import java.util.{Set => JavaSet}
 
+import org.apache.hadoop.hive.conf.HiveConf
 import org.apache.hadoop.hive.ql.exec.FunctionRegistry
 import org.apache.hadoop.hive.ql.io.avro.{AvroContainerInputFormat, AvroContainerOutputFormat}
 import org.apache.hadoop.hive.ql.metadata.Table
@@ -62,6 +63,8 @@ object TestHive
 class TestHiveContext(sc: SparkContext) extends HiveContext(sc) {
   self =>
 
+  import HiveContext._
+
   // By clearing the port we force Spark to pick a new one.  This allows us to rerun tests
   // without restarting the JVM.
   System.clearProperty("spark.hostPort")
@@ -70,24 +73,16 @@ class TestHiveContext(sc: SparkContext) extends HiveContext(sc) {
   hiveconf.set("hive.plan.serialization.format", "javaXML")
 
   lazy val warehousePath = Utils.createTempDir()
-  lazy val metastorePath = Utils.createTempDir()
 
   /** Sets up the system initially or after a RESET command */
-  protected def configure(): Unit = {
-    warehousePath.delete()
-    metastorePath.delete()
-    setConf("javax.jdo.option.ConnectionURL",
-      s"jdbc:derby:;databaseName=$metastorePath;create=true")
-    setConf("hive.metastore.warehouse.dir", warehousePath.toString)
-  }
+  protected override def configure(): Map[String, String] =
+   newTemporaryConfiguration() ++ Map("hive.metastore.warehouse.dir" -> warehousePath.toString)
 
   val testTempDir = Utils.createTempDir()
 
   // For some hive test case which contain ${system:test.tmp.dir}
   System.setProperty("test.tmp.dir", testTempDir.getCanonicalPath)
 
-  configure() // Must be called before initializing the catalog below.
-
   /** The location of the compiled hive distribution */
   lazy val hiveHome = envVarToFile("HIVE_HOME")
   /** The location of the hive source code. */
@@ -195,6 +190,7 @@ class TestHiveContext(sc: SparkContext) extends HiveContext(sc) {
    * A list of test tables and the DDL required to initialize them.  A test table is loaded on
    * demand when a query are run against it.
    */
+  @transient
   lazy val testTables = new mutable.HashMap[String, TestTable]()
 
   def registerTestTable(testTable: TestTable): Unit = {
@@ -204,6 +200,7 @@ class TestHiveContext(sc: SparkContext) extends HiveContext(sc) {
   // The test tables that are defined in the Hive QTestUtil.
   // /itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java
   // https://github.com/apache/hive/blob/branch-0.13/data/scripts/q_test_init.sql
+  @transient
   val hiveQTestUtilTables = Seq(
     TestTable("src",
       "CREATE TABLE src (key INT, value STRING)".cmd,
@@ -236,16 +233,18 @@ class TestHiveContext(sc: SparkContext) extends HiveContext(sc) {
       import org.apache.hadoop.mapred.{SequenceFileInputFormat, SequenceFileOutputFormat}
       import org.apache.thrift.protocol.TBinaryProtocol
 
-      val srcThrift = new Table("default", "src_thrift")
-      srcThrift.setFields(Nil)
-      srcThrift.setInputFormatClass(classOf[SequenceFileInputFormat[_,_]].getName)
-      // In Hive, SequenceFileOutputFormat will be substituted by HiveSequenceFileOutputFormat.
-      srcThrift.setOutputFormatClass(classOf[SequenceFileOutputFormat[_,_]].getName)
-      srcThrift.setSerializationLib(classOf[ThriftDeserializer].getName)
-      srcThrift.setSerdeParam("serialization.class", classOf[Complex].getName)
-      srcThrift.setSerdeParam("serialization.format", classOf[TBinaryProtocol].getName)
-      catalog.client.createTable(srcThrift)
-
+      runSqlHive(
+        s"""
+         |CREATE TABLE src_thrift(fake INT)
+         |ROW FORMAT SERDE '${classOf[ThriftDeserializer].getName}'
+         |WITH SERDEPROPERTIES(
+         |  'serialization.class'='${classOf[Complex].getName}',
+         |  'serialization.format'='${classOf[TBinaryProtocol].getName}'
+         |)
+         |STORED AS
+         |INPUTFORMAT '${classOf[SequenceFileInputFormat[_,_]].getName}'
+         |OUTPUTFORMAT '${classOf[SequenceFileOutputFormat[_,_]].getName}'
+        """.stripMargin)
 
       runSqlHive(
         s"LOAD DATA LOCAL INPATH '${getHiveFile("data/files/complex.seq")}' INTO TABLE src_thrift")
@@ -367,7 +366,7 @@ class TestHiveContext(sc: SparkContext) extends HiveContext(sc) {
     if (!(loadedTables contains name)) {
       // Marks the table as loaded first to prevent infinite mutually recursive table loading.
       loadedTables += name
-      logInfo(s"Loading test table $name")
+      logDebug(s"Loading test table $name")
       val createCmds =
         testTables.get(name).map(_.commands).getOrElse(sys.error(s"Unknown test table $name"))
       createCmds.foreach(_())
@@ -384,9 +383,6 @@ class TestHiveContext(sc: SparkContext) extends HiveContext(sc) {
    */
   protected val originalUdfs: JavaSet[String] = FunctionRegistry.getFunctionNames
 
-  // Database default may not exist in 0.13.1, create it if not exist
-  HiveShim.createDefaultDBIfNeeded(this)
-
   /**
    * Resets the test instance by deleting any tables that have been created.
    * TODO: also clear out UDFs, views, etc.
@@ -401,24 +397,7 @@ class TestHiveContext(sc: SparkContext) extends HiveContext(sc) {
       cacheManager.clearCache()
       loadedTables.clear()
       catalog.cachedDataSourceTables.invalidateAll()
-      catalog.client.getAllTables("default").foreach { t =>
-        logDebug(s"Deleting table $t")
-        val table = catalog.client.getTable("default", t)
-
-        catalog.client.getIndexes("default", t, 255).foreach { index =>
-          catalog.client.dropIndex("default", t, index.getIndexName, true)
-        }
-
-        if (!table.isIndexTable) {
-          catalog.client.dropTable("default", t)
-        }
-      }
-
-      catalog.client.getAllDatabases.filterNot(_ == "default").foreach { db =>
-        logDebug(s"Dropping Database: $db")
-        catalog.client.dropDatabase(db, true, false, true)
-      }
-
+      catalog.client.reset()
       catalog.unregisterAllTables()
 
       FunctionRegistry.getFunctionNames.filterNot(originalUdfs.contains(_)).foreach { udfName =>
@@ -429,7 +408,8 @@ class TestHiveContext(sc: SparkContext) extends HiveContext(sc) {
       hiveconf.set("fs.default.name", new File(".").toURI.toString)
       // It is important that we RESET first as broken hooks that might have been set could break
       // other sql exec here.
-      runSqlHive("RESET")
+      executionHive.runSqlHive("RESET")
+      metadataHive.runSqlHive("RESET")
       // For some reason, RESET does not reset the following variables...
       // https://issues.apache.org/jira/browse/HIVE-9004
       runSqlHive("set hive.table.parameters.default=")
@@ -437,7 +417,11 @@ class TestHiveContext(sc: SparkContext) extends HiveContext(sc) {
       runSqlHive("set datanucleus.cache.collections.lazy=true")
       // Lots of tests fail if we do not change the partition whitelist from the default.
       runSqlHive("set hive.metastore.partition.name.whitelist.pattern=.*")
-      configure()
+
+      configure().foreach {
+        case (k, v) =>
+          metadataHive.runSqlHive(s"SET $k=$v")
+      }
 
       runSqlHive("USE default")
 
diff --git a/sql/hive/src/test/resources/log4j.properties b/sql/hive/src/test/resources/log4j.properties
index 5bc08062d30eb..92eaf1f2795ba 100644
--- a/sql/hive/src/test/resources/log4j.properties
+++ b/sql/hive/src/test/resources/log4j.properties
@@ -33,7 +33,7 @@ log4j.appender.FA.layout=org.apache.log4j.PatternLayout
 log4j.appender.FA.layout.ConversionPattern=%d{HH:mm:ss.SSS} %t %p %c{1}: %m%n
 
 # Set the logger level of File Appender to WARN
-log4j.appender.FA.Threshold = INFO
+log4j.appender.FA.Threshold = DEBUG
 
 # Some packages are noisy for no good reason.
 log4j.additivity.org.apache.hadoop.hive.serde2.lazy.LazyStruct=false
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/ErrorPositionSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/ErrorPositionSuite.scala
index d960a30e00738..30f5313d2b812 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/ErrorPositionSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/ErrorPositionSuite.scala
@@ -17,12 +17,11 @@
 
 package org.apache.spark.sql.hive
 
-import java.io.{OutputStream, PrintStream}
-
 import scala.util.Try
 
 import org.scalatest.BeforeAndAfter
 
+import org.apache.spark.sql.catalyst.util.quietly
 import org.apache.spark.sql.hive.test.TestHive._
 import org.apache.spark.sql.hive.test.TestHive.implicits._
 import org.apache.spark.sql.{AnalysisException, QueryTest}
@@ -109,25 +108,6 @@ class ErrorPositionSuite extends QueryTest with BeforeAndAfter {
       "SELECT 1 + array(1)", "1 + array")
   }
 
-  /** Hive can be very noisy, messing up the output of our tests. */
-  private def quietly[A](f: => A): A = {
-    val origErr = System.err
-    val origOut = System.out
-    try {
-      System.setErr(new PrintStream(new OutputStream {
-        def write(b: Int) = {}
-      }))
-      System.setOut(new PrintStream(new OutputStream {
-        def write(b: Int) = {}
-      }))
-
-      f
-    } finally {
-      System.setErr(origErr)
-      System.setOut(origOut)
-    }
-  }
-
   /**
    * Creates a test that checks to see if the error thrown when analyzing a given query includes
    * the location of the given token in the query string.
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
index 0538aa203c5a0..47c60f651d14c 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
@@ -31,6 +31,7 @@ import org.apache.hadoop.mapred.InvalidInputException
 import org.apache.spark.sql._
 import org.apache.spark.util.Utils
 import org.apache.spark.sql.types._
+import org.apache.spark.sql.hive.client.{HiveTable, ManagedTable}
 import org.apache.spark.sql.hive.test.TestHive._
 import org.apache.spark.sql.hive.test.TestHive.implicits._
 import org.apache.spark.sql.parquet.ParquetRelation2
@@ -686,16 +687,21 @@ class MetastoreDataSourcesSuite extends QueryTest with BeforeAndAfterEach {
   test("SPARK-6655 still support a schema stored in spark.sql.sources.schema") {
     val tableName = "spark6655"
     val schema = StructType(StructField("int", IntegerType, true) :: Nil)
-    // Manually create the metadata in metastore.
-    val tbl = new Table("default", tableName)
-    tbl.setProperty("spark.sql.sources.provider", "json")
-    tbl.setProperty("spark.sql.sources.schema", schema.json)
-    tbl.setProperty("EXTERNAL", "FALSE")
-    tbl.setTableType(TableType.MANAGED_TABLE)
-    tbl.setSerdeParam("path", catalog.hiveDefaultTableFilePath(tableName))
-    catalog.synchronized {
-      catalog.client.createTable(tbl)
-    }
+
+    val hiveTable = HiveTable(
+      specifiedDatabase = Some("default"),
+      name = tableName,
+      schema = Seq.empty,
+      partitionColumns = Seq.empty,
+      properties = Map(
+        "spark.sql.sources.provider" -> "json",
+        "spark.sql.sources.schema" -> schema.json,
+        "EXTERNAL" -> "FALSE"),
+      tableType = ManagedTable,
+      serdeProperties = Map(
+        "path" -> catalog.hiveDefaultTableFilePath(tableName)))
+
+    catalog.client.createTable(hiveTable)
 
     invalidateTable(tableName)
     val actualSchema = table(tableName).schema
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/SerializationSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/SerializationSuite.scala
index d6ddd539d159d..8afe5459d4f1b 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/SerializationSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/SerializationSuite.scala
@@ -26,8 +26,10 @@ import org.apache.spark.sql.hive.test.TestHive
 class SerializationSuite extends FunSuite {
 
   test("[SPARK-5840] HiveContext should be serializable") {
-    val hiveContext = new HiveContext(TestHive.sparkContext)
+    val hiveContext = TestHive
     hiveContext.hiveconf
-    new JavaSerializer(new SparkConf()).newInstance().serialize(hiveContext)
+    val serializer = new JavaSerializer(new SparkConf()).newInstance()
+    val bytes = serializer.serialize(hiveContext)
+    val deSer = serializer.deserialize[AnyRef](bytes)
   }
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala
index 81e77ba257bf1..321dc8d7322b8 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala
@@ -22,9 +22,13 @@ import org.apache.spark.sql.catalyst.util.quietly
 import org.apache.spark.util.Utils
 import org.scalatest.FunSuite
 
+/**
+ * A simple set of tests that call the methods of a hive ClientInterface, loading different version 
+ * of hive from maven central.  These tests are simple in that they are mostly just testing to make 
+ * sure that reflective calls are not throwing NoSuchMethod error, but the actually functionallity 
+ * is not fully tested.
+ */
 class VersionsSuite extends FunSuite with Logging {
-  val testType = "derby"
-
   private def buildConf() = {
     lazy val warehousePath = Utils.createTempDir()
     lazy val metastorePath = Utils.createTempDir()
@@ -50,6 +54,14 @@ class VersionsSuite extends FunSuite with Logging {
     causes
   }
 
+  private val emptyDir = Utils.createTempDir().getCanonicalPath
+
+  private def partSpec = {
+    val hashMap = new java.util.LinkedHashMap[String, String]
+    hashMap.put("key", "1")
+    hashMap
+  }
+
   // Its actually pretty easy to mess things up and have all of your tests "pass" by accidentally
   // connecting to an auto-populated, in-process metastore.  Let's make sure we are getting the
   // versions right by forcing a known compatibility failure.
@@ -66,10 +78,9 @@ class VersionsSuite extends FunSuite with Logging {
   private var client: ClientInterface = null
 
   versions.foreach { version =>
-    test(s"$version: listTables") {
+    test(s"$version: create client") {
       client = null
       client = IsolatedClientLoader.forVersion(version, buildConf()).client
-      client.listTables("default")
     }
 
     test(s"$version: createDatabase") {
@@ -101,5 +112,64 @@ class VersionsSuite extends FunSuite with Logging {
     test(s"$version: getTable") {
       client.getTable("default", "src")
     }
+
+    test(s"$version: listTables") {
+      assert(client.listTables("default") === Seq("src"))
+    }
+
+    test(s"$version: currentDatabase") {
+      assert(client.currentDatabase === "default")
+    }
+
+    test(s"$version: getDatabase") {
+      client.getDatabase("default")
+    }
+
+    test(s"$version: alterTable") {
+      client.alterTable(client.getTable("default", "src"))
+    }
+
+    test(s"$version: set command") {
+      client.runSqlHive("SET spark.sql.test.key=1")
+    }
+
+    test(s"$version: create partitioned table DDL") {
+      client.runSqlHive("CREATE TABLE src_part (value INT) PARTITIONED BY (key INT)")
+      client.runSqlHive("ALTER TABLE src_part ADD PARTITION (key = '1')")
+    }
+
+    test(s"$version: getPartitions") {
+      client.getAllPartitions(client.getTable("default", "src_part"))
+    }
+
+    test(s"$version: loadPartition") {
+      client.loadPartition(
+        emptyDir,
+        "default.src_part",
+        partSpec,
+        false,
+        false,
+        false,
+        false)
+    }
+
+    test(s"$version: loadTable") {
+      client.loadTable(
+        emptyDir,
+        "src",
+        false,
+        false)
+    }
+
+    test(s"$version: loadDynamicPartitions") {
+      client.loadDynamicPartitions(
+        emptyDir,
+        "default.src_part",
+        partSpec,
+        false,
+        1,
+        false,
+        false)
+    }
   }
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
index a3eacbd4e3981..9c056e493bfde 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
@@ -300,6 +300,8 @@ abstract class HiveComparisonTest
 
             val hiveQueries = queryList.map(new TestHive.QueryExecution(_))
             // Make sure we can at least parse everything before attempting hive execution.
+            // Note this must only look at the logical plan as we might not be able to analyze if
+            // other DDL has not been executed yet.
             hiveQueries.foreach(_.logical)
             val computedResults = (queryList.zipWithIndex, hiveQueries, hiveCacheFiles).zipped.map {
               case ((queryString, i), hiveQuery, cachedAnswerFile)=>
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
index ac10b173307d8..7d728fe87bda7 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
@@ -900,7 +900,7 @@ class HiveQuerySuite extends HiveComparisonTest with BeforeAndAfter {
       |DROP TABLE IF EXISTS dynamic_part_table;
     """.stripMargin)
 
-  test("Dynamic partition folder layout") {
+  ignore("Dynamic partition folder layout") {
     sql("DROP TABLE IF EXISTS dynamic_part_table")
     sql("CREATE TABLE dynamic_part_table(intcol INT) PARTITIONED BY (partcol1 INT, partcol2 INT)")
     sql("SET hive.exec.dynamic.partition.mode=nonstrict")
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruningSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruningSuite.scala
index 45f10e2fe64aa..de6a41ce5bfcb 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruningSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruningSuite.scala
@@ -150,20 +150,21 @@ class PruningSuite extends HiveComparisonTest with BeforeAndAfter {
       val (actualScannedColumns, actualPartValues) = plan.collect {
         case p @ HiveTableScan(columns, relation, _) =>
           val columnNames = columns.map(_.name)
-          val partValues = p.prunePartitions(relation.hiveQlPartitions).map(_.getValues)
+          val partValues = if (relation.table.isPartitioned) {
+            p.prunePartitions(relation.hiveQlPartitions).map(_.getValues)
+          } else {
+            Seq.empty
+          }
           (columnNames, partValues)
       }.head
 
       assert(actualOutputColumns === expectedOutputColumns, "Output columns mismatch")
       assert(actualScannedColumns === expectedScannedColumns, "Scanned columns mismatch")
 
-      assert(
-        actualPartValues.length === expectedPartValues.length,
-        "Partition value count mismatches")
+      val actualPartitions = actualPartValues.map(_.toSeq.mkString(",")).sorted
+      val expectedPartitions = expectedPartValues.map(_.mkString(",")).sorted
 
-      for ((actual, expected) <- actualPartValues.zip(expectedPartValues)) {
-        assert(actual sameElements expected, "Partition values mismatch")
-      }
+      assert(actualPartitions === expectedPartitions, "Partitions selected do not match")
     }
 
     // Creates a query test to compare query results generated by Hive and Catalyst.

From 92f8f803a68e0c16771e9793098c6d76dfdf99af Mon Sep 17 00:00:00 2001
From: Shuo Xiang <shuoxiangpub@gmail.com>
Date: Thu, 7 May 2015 20:55:08 -0700
Subject: [PATCH 39/48] [SPARK-7452] [MLLIB] fix bug in topBykey and update
 test

the toArray function of the BoundedPriorityQueue does not necessarily preserve order. Add a counter-example as the test, which would fail the original impl.

Author: Shuo Xiang <shuoxiangpub@gmail.com>

Closes #5990 from coderxiang/topbykey-test and squashes the following commits:

98804c9 [Shuo Xiang] fix bug in topBykey and update test
---
 .../org/apache/spark/mllib/rdd/MLPairRDDFunctions.scala  | 2 +-
 .../apache/spark/mllib/rdd/MLPairRDDFunctionsSuite.scala | 9 +++++----
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctions.scala b/mllib/src/main/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctions.scala
index 5af55aaf84802..1b93e2d764c69 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctions.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctions.scala
@@ -46,7 +46,7 @@ class MLPairRDDFunctions[K: ClassTag, V: ClassTag](self: RDD[(K, V)]) extends Se
       combOp = (queue1, queue2) => {
         queue1 ++= queue2
       }
-    ).mapValues(_.toArray.reverse)  // This is an min-heap, so we reverse the order.
+    ).mapValues(_.toArray.sorted(ord.reverse))  // This is an min-heap, so we reverse the order.
   }
 }
 
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctionsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctionsSuite.scala
index cb8fe4dba96f5..57216e8eb4a55 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctionsSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctionsSuite.scala
@@ -24,13 +24,14 @@ import org.apache.spark.mllib.rdd.MLPairRDDFunctions._
 
 class MLPairRDDFunctionsSuite extends FunSuite with MLlibTestSparkContext {
   test("topByKey") {
-    val topMap = sc.parallelize(Array((1, 1), (1, 2), (3, 2), (3, 7), (5, 1), (3, 5)), 2)
-      .topByKey(2)
+    val topMap = sc.parallelize(Array((1, 7), (1, 3), (1, 6), (1, 1), (1, 2), (3, 2), (3, 7), (5,
+      1), (3, 5)), 2)
+      .topByKey(5)
       .collectAsMap()
 
     assert(topMap.size === 3)
-    assert(topMap(1) === Array(2, 1))
-    assert(topMap(3) === Array(7, 5))
+    assert(topMap(1) === Array(7, 6, 3, 2, 1))
+    assert(topMap(3) === Array(7, 5, 2))
     assert(topMap(5) === Array(1))
   }
 }

From 3af423c92f117b5dd4dc6832dc50911cedb29abc Mon Sep 17 00:00:00 2001
From: Yin Huai <yhuai@databricks.com>
Date: Thu, 7 May 2015 20:59:42 -0700
Subject: [PATCH 40/48] [SPARK-6986] [SQL] Use Serializer2 in more cases.

With https://github.com/apache/spark/commit/0a2b15ce43cf6096e1a7ae060b7c8a4010ce3b92, the serialization stream and deserialization stream has enough information to determine it is handling a key-value pari, a key, or a value. It is safe to use `SparkSqlSerializer2` in more cases.

Author: Yin Huai <yhuai@databricks.com>

Closes #5849 from yhuai/serializer2MoreCases and squashes the following commits:

53a5eaa [Yin Huai] Josh's comments.
487f540 [Yin Huai] Use BufferedOutputStream.
8385f95 [Yin Huai] Always create a new row at the deserialization side to work with sort merge join.
c7e2129 [Yin Huai] Update tests.
4513d13 [Yin Huai] Use Serializer2 in more places.
---
 .../apache/spark/sql/execution/Exchange.scala | 23 ++----
 .../sql/execution/SparkSqlSerializer2.scala   | 74 ++++++++++++-------
 .../execution/SparkSqlSerializer2Suite.scala  | 30 ++++----
 3 files changed, 69 insertions(+), 58 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/Exchange.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/Exchange.scala
index 5b2e46962cd3b..f0d54cd6cd94f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/Exchange.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/Exchange.scala
@@ -84,18 +84,8 @@ case class Exchange(
   def serializer(
       keySchema: Array[DataType],
       valueSchema: Array[DataType],
+      hasKeyOrdering: Boolean,
       numPartitions: Int): Serializer = {
-    // In ExternalSorter's spillToMergeableFile function, key-value pairs are written out
-    // through write(key) and then write(value) instead of write((key, value)). Because
-    // SparkSqlSerializer2 assumes that objects passed in are Product2, we cannot safely use
-    // it when spillToMergeableFile in ExternalSorter will be used.
-    // So, we will not use SparkSqlSerializer2 when
-    //  - Sort-based shuffle is enabled and the number of reducers (numPartitions) is greater
-    //     then the bypassMergeThreshold; or
-    //  - newOrdering is defined.
-    val cannotUseSqlSerializer2 =
-      (sortBasedShuffleOn && numPartitions > bypassMergeThreshold) || newOrdering.nonEmpty
-
     // It is true when there is no field that needs to be write out.
     // For now, we will not use SparkSqlSerializer2 when noField is true.
     val noField =
@@ -104,14 +94,13 @@ case class Exchange(
 
     val useSqlSerializer2 =
         child.sqlContext.conf.useSqlSerializer2 &&   // SparkSqlSerializer2 is enabled.
-        !cannotUseSqlSerializer2 &&                  // Safe to use Serializer2.
         SparkSqlSerializer2.support(keySchema) &&    // The schema of key is supported.
         SparkSqlSerializer2.support(valueSchema) &&  // The schema of value is supported.
         !noField
 
     val serializer = if (useSqlSerializer2) {
       logInfo("Using SparkSqlSerializer2.")
-      new SparkSqlSerializer2(keySchema, valueSchema)
+      new SparkSqlSerializer2(keySchema, valueSchema, hasKeyOrdering)
     } else {
       logInfo("Using SparkSqlSerializer.")
       new SparkSqlSerializer(sparkConf)
@@ -154,7 +143,8 @@ case class Exchange(
           }
         val keySchema = expressions.map(_.dataType).toArray
         val valueSchema = child.output.map(_.dataType).toArray
-        shuffled.setSerializer(serializer(keySchema, valueSchema, numPartitions))
+        shuffled.setSerializer(
+          serializer(keySchema, valueSchema, newOrdering.nonEmpty, numPartitions))
 
         shuffled.map(_._2)
 
@@ -179,7 +169,8 @@ case class Exchange(
             new ShuffledRDD[Row, Null, Null](rdd, part)
           }
         val keySchema = child.output.map(_.dataType).toArray
-        shuffled.setSerializer(serializer(keySchema, null, numPartitions))
+        shuffled.setSerializer(
+          serializer(keySchema, null, newOrdering.nonEmpty, numPartitions))
 
         shuffled.map(_._1)
 
@@ -199,7 +190,7 @@ case class Exchange(
         val partitioner = new HashPartitioner(1)
         val shuffled = new ShuffledRDD[Null, Row, Row](rdd, partitioner)
         val valueSchema = child.output.map(_.dataType).toArray
-        shuffled.setSerializer(serializer(null, valueSchema, 1))
+        shuffled.setSerializer(serializer(null, valueSchema, false, 1))
         shuffled.map(_._2)
 
       case _ => sys.error(s"Exchange not implemented for $newPartitioning")
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlSerializer2.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlSerializer2.scala
index 35ad987eb1a63..256d527d7b636 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlSerializer2.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlSerializer2.scala
@@ -27,7 +27,7 @@ import scala.reflect.ClassTag
 import org.apache.spark.serializer._
 import org.apache.spark.Logging
 import org.apache.spark.sql.Row
-import org.apache.spark.sql.catalyst.expressions.SpecificMutableRow
+import org.apache.spark.sql.catalyst.expressions.{SpecificMutableRow, MutableRow, GenericMutableRow}
 import org.apache.spark.sql.types._
 
 /**
@@ -49,9 +49,9 @@ private[sql] class Serializer2SerializationStream(
     out: OutputStream)
   extends SerializationStream with Logging {
 
-  val rowOut = new DataOutputStream(out)
-  val writeKeyFunc = SparkSqlSerializer2.createSerializationFunction(keySchema, rowOut)
-  val writeValueFunc = SparkSqlSerializer2.createSerializationFunction(valueSchema, rowOut)
+  private val rowOut = new DataOutputStream(new BufferedOutputStream(out))
+  private val writeKeyFunc = SparkSqlSerializer2.createSerializationFunction(keySchema, rowOut)
+  private val writeValueFunc = SparkSqlSerializer2.createSerializationFunction(valueSchema, rowOut)
 
   override def writeObject[T: ClassTag](t: T): SerializationStream = {
     val kv = t.asInstanceOf[Product2[Row, Row]]
@@ -86,31 +86,44 @@ private[sql] class Serializer2SerializationStream(
 private[sql] class Serializer2DeserializationStream(
     keySchema: Array[DataType],
     valueSchema: Array[DataType],
+    hasKeyOrdering: Boolean,
     in: InputStream)
   extends DeserializationStream with Logging  {
 
-  val rowIn = new DataInputStream(new BufferedInputStream(in))
+  private val rowIn = new DataInputStream(new BufferedInputStream(in))
+
+  private def rowGenerator(schema: Array[DataType]): () => (MutableRow) = {
+    if (schema == null) {
+      () => null
+    } else {
+      if (hasKeyOrdering) {
+        // We have key ordering specified in a ShuffledRDD, it is not safe to reuse a mutable row.
+        () => new GenericMutableRow(schema.length)
+      } else {
+        // It is safe to reuse the mutable row.
+        val mutableRow = new SpecificMutableRow(schema)
+        () => mutableRow
+      }
+    }
+  }
 
-  val key = if (keySchema != null) new SpecificMutableRow(keySchema) else null
-  val value = if (valueSchema != null) new SpecificMutableRow(valueSchema) else null
-  val readKeyFunc = SparkSqlSerializer2.createDeserializationFunction(keySchema, rowIn, key)
-  val readValueFunc = SparkSqlSerializer2.createDeserializationFunction(valueSchema, rowIn, value)
+  // Functions used to return rows for key and value.
+  private val getKey = rowGenerator(keySchema)
+  private val getValue = rowGenerator(valueSchema)
+  // Functions used to read a serialized row from the InputStream and deserialize it.
+  private val readKeyFunc = SparkSqlSerializer2.createDeserializationFunction(keySchema, rowIn)
+  private val readValueFunc = SparkSqlSerializer2.createDeserializationFunction(valueSchema, rowIn)
 
   override def readObject[T: ClassTag](): T = {
-    readKeyFunc()
-    readValueFunc()
-
-    (key, value).asInstanceOf[T]
+    (readKeyFunc(getKey()), readValueFunc(getValue())).asInstanceOf[T]
   }
 
   override def readKey[T: ClassTag](): T = {
-    readKeyFunc()
-    key.asInstanceOf[T]
+    readKeyFunc(getKey()).asInstanceOf[T]
   }
 
   override def readValue[T: ClassTag](): T = {
-    readValueFunc()
-    value.asInstanceOf[T]
+    readValueFunc(getValue()).asInstanceOf[T]
   }
 
   override def close(): Unit = {
@@ -118,9 +131,10 @@ private[sql] class Serializer2DeserializationStream(
   }
 }
 
-private[sql] class ShuffleSerializerInstance(
+private[sql] class SparkSqlSerializer2Instance(
     keySchema: Array[DataType],
-    valueSchema: Array[DataType])
+    valueSchema: Array[DataType],
+    hasKeyOrdering: Boolean)
   extends SerializerInstance {
 
   def serialize[T: ClassTag](t: T): ByteBuffer =
@@ -137,7 +151,7 @@ private[sql] class ShuffleSerializerInstance(
   }
 
   def deserializeStream(s: InputStream): DeserializationStream = {
-    new Serializer2DeserializationStream(keySchema, valueSchema, s)
+    new Serializer2DeserializationStream(keySchema, valueSchema, hasKeyOrdering, s)
   }
 }
 
@@ -148,12 +162,16 @@ private[sql] class ShuffleSerializerInstance(
  * The schema of keys is represented by `keySchema` and that of values is represented by
  * `valueSchema`.
  */
-private[sql] class SparkSqlSerializer2(keySchema: Array[DataType], valueSchema: Array[DataType])
+private[sql] class SparkSqlSerializer2(
+    keySchema: Array[DataType],
+    valueSchema: Array[DataType],
+    hasKeyOrdering: Boolean)
   extends Serializer
   with Logging
   with Serializable{
 
-  def newInstance(): SerializerInstance = new ShuffleSerializerInstance(keySchema, valueSchema)
+  def newInstance(): SerializerInstance =
+    new SparkSqlSerializer2Instance(keySchema, valueSchema, hasKeyOrdering)
 
   override def supportsRelocationOfSerializedObjects: Boolean = {
     // SparkSqlSerializer2 is stateless and writes no stream headers
@@ -323,11 +341,11 @@ private[sql] object SparkSqlSerializer2 {
    */
   def createDeserializationFunction(
       schema: Array[DataType],
-      in: DataInputStream,
-      mutableRow: SpecificMutableRow): () => Unit = {
-    () => {
-      // If the schema is null, the returned function does nothing when it get called.
-      if (schema != null) {
+      in: DataInputStream): (MutableRow) => Row = {
+    if (schema == null) {
+      (mutableRow: MutableRow) => null
+    } else {
+      (mutableRow: MutableRow) => {
         var i = 0
         while (i < schema.length) {
           schema(i) match {
@@ -440,6 +458,8 @@ private[sql] object SparkSqlSerializer2 {
           }
           i += 1
         }
+
+        mutableRow
       }
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlSerializer2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlSerializer2Suite.scala
index 27f063d73a9a9..15337c4045436 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlSerializer2Suite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlSerializer2Suite.scala
@@ -148,6 +148,15 @@ abstract class SparkSqlSerializer2Suite extends QueryTest with BeforeAndAfterAll
       table("shuffle").collect())
   }
 
+  test("key schema is null") {
+    val aggregations = allColumns.split(",").map(c => s"COUNT($c)").mkString(",")
+    val df = sql(s"SELECT $aggregations FROM shuffle")
+    checkSerializer(df.queryExecution.executedPlan, serializerClass)
+    checkAnswer(
+      df,
+      Row(1000, 1000, 0, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000))
+  }
+
   test("value schema is null") {
     val df = sql(s"SELECT col0 FROM shuffle ORDER BY col0")
     checkSerializer(df.queryExecution.executedPlan, serializerClass)
@@ -167,29 +176,20 @@ class SparkSqlSerializer2SortShuffleSuite extends SparkSqlSerializer2Suite {
   override def beforeAll(): Unit = {
     super.beforeAll()
     // Sort merge will not be triggered.
-    sql("set spark.sql.shuffle.partitions = 200")
-  }
-
-  test("key schema is null") {
-    val aggregations = allColumns.split(",").map(c => s"COUNT($c)").mkString(",")
-    val df = sql(s"SELECT $aggregations FROM shuffle")
-    checkSerializer(df.queryExecution.executedPlan, serializerClass)
-    checkAnswer(
-      df,
-      Row(1000, 1000, 0, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000))
+    val bypassMergeThreshold =
+      sparkContext.conf.getInt("spark.shuffle.sort.bypassMergeThreshold", 200)
+    sql(s"set spark.sql.shuffle.partitions=${bypassMergeThreshold-1}")
   }
 }
 
 /** For now, we will use SparkSqlSerializer for sort based shuffle with sort merge. */
 class SparkSqlSerializer2SortMergeShuffleSuite extends SparkSqlSerializer2Suite {
 
-  // We are expecting SparkSqlSerializer.
-  override val serializerClass: Class[Serializer] =
-    classOf[SparkSqlSerializer].asInstanceOf[Class[Serializer]]
-
   override def beforeAll(): Unit = {
     super.beforeAll()
     // To trigger the sort merge.
-    sql("set spark.sql.shuffle.partitions = 201")
+    val bypassMergeThreshold =
+      sparkContext.conf.getInt("spark.shuffle.sort.bypassMergeThreshold", 200)
+    sql(s"set spark.sql.shuffle.partitions=${bypassMergeThreshold + 1}")
   }
 }

From 714db2ef52c0fe34418e252e5a6f220337022046 Mon Sep 17 00:00:00 2001
From: Andrew Or <andrew@databricks.com>
Date: Thu, 7 May 2015 22:32:13 -0700
Subject: [PATCH 41/48] [SPARK-7470] [SQL] Spark shell SQLContext crashes
 without hive

This only happens if you have `SPARK_PREPEND_CLASSES` set. Then I built it with `build/sbt clean assembly compile` and just ran it with `bin/spark-shell`.
```
...
15/05/07 17:07:30 INFO EventLoggingListener: Logging events to file:/tmp/spark-events/local-1431043649919
15/05/07 17:07:30 INFO SparkILoop: Created spark context..
Spark context available as sc.
java.lang.NoClassDefFoundError: org/apache/hadoop/hive/conf/HiveConf
	at java.lang.Class.getDeclaredConstructors0(Native Method)
	at java.lang.Class.privateGetDeclaredConstructors(Class.java:2493)
	at java.lang.Class.getConstructor0(Class.java:2803)
...
Caused by: java.lang.ClassNotFoundException: org.apache.hadoop.hive.conf.HiveConf
	at java.net.URLClassLoader$1.run(URLClassLoader.java:366)
	at java.net.URLClassLoader$1.run(URLClassLoader.java:355)
	at java.security.AccessController.doPrivileged(Native Method)
	at java.net.URLClassLoader.findClass(URLClassLoader.java:354)
	at java.lang.ClassLoader.loadClass(ClassLoader.java:425)
	at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
	at java.lang.ClassLoader.loadClass(ClassLoader.java:358)
	... 52 more

<console>:10: error: not found: value sqlContext
       import sqlContext.implicits._
              ^
<console>:10: error: not found: value sqlContext
       import sqlContext.sql
              ^
```
yhuai marmbrus

Author: Andrew Or <andrew@databricks.com>

Closes #5997 from andrewor14/sql-shell-crash and squashes the following commits:

61147e6 [Andrew Or] Also expect NoClassDefFoundError
---
 .../src/main/scala/org/apache/spark/repl/SparkILoop.scala       | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkILoop.scala b/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkILoop.scala
index 8dc0e0c965923..488f3a9f33256 100644
--- a/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkILoop.scala
+++ b/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkILoop.scala
@@ -1028,7 +1028,7 @@ class SparkILoop(
       logInfo("Created sql context (with Hive support)..")
     }
     catch {
-      case cnf: java.lang.ClassNotFoundException =>
+      case _: java.lang.ClassNotFoundException | _: java.lang.NoClassDefFoundError =>
         sqlContext = new SQLContext(sparkContext)
         logInfo("Created sql context..")
     }

From f496bf3c539a873ffdf3aa803847ef7b50135bd7 Mon Sep 17 00:00:00 2001
From: wangfei <wangfei1@huawei.com>
Date: Thu, 7 May 2015 22:55:42 -0700
Subject: [PATCH 42/48] [SPARK-7232] [SQL] Add a Substitution batch for spark
 sql analyzer

  Added a new batch named `Substitution` before `Resolution` batch. The motivation for this is there are kind of cases we want to do some substitution on the parsed logical plan before resolve it.
Consider this two cases:
1 CTE, for cte we first build a row logical plan
```
'With Map(q1 -> 'Subquery q1
                   'Project ['key]
                      'UnresolvedRelation [src], None)
 'Project [*]
  'Filter ('key = 5)
   'UnresolvedRelation [q1], None
```
In `With` logicalplan here is a map stored the (`q1-> subquery`), we want first take off the with command and substitute the  `q1` of `UnresolvedRelation` by the `subquery`

2 Another example is Window function, in window function user may define some windows, we also need substitute the window name of child by the concrete window. this should also done in the Substitution batch.

Author: wangfei <wangfei1@huawei.com>

Closes #5776 from scwf/addbatch and squashes the following commits:

d4b962f [wangfei] added WindowsSubstitution
70f6932 [wangfei] Merge branch 'master' of https://github.com/apache/spark into addbatch
ecaeafb [wangfei] address yhuai's comments
553005a [wangfei] fix test case
0c54798 [wangfei] address comments
29aaaaf [wangfei] fix compile
1c9a092 [wangfei] added Substitution bastch
---
 .../sql/catalyst/analysis/Analyzer.scala      | 98 ++++++++++++-------
 1 file changed, 60 insertions(+), 38 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 7e46ad851cdd3..bb7913e186a85 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -55,6 +55,10 @@ class Analyzer(
   val extendedResolutionRules: Seq[Rule[LogicalPlan]] = Nil
 
   lazy val batches: Seq[Batch] = Seq(
+    Batch("Substitution", fixedPoint,
+      CTESubstitution ::
+      WindowsSubstitution ::
+      Nil : _*),
     Batch("Resolution", fixedPoint,
       ResolveRelations ::
       ResolveReferences ::
@@ -71,6 +75,55 @@ class Analyzer(
       extendedResolutionRules : _*)
   )
 
+  /**
+   * Substitute child plan with cte definitions
+   */
+  object CTESubstitution extends Rule[LogicalPlan] {
+    // TODO allow subquery to define CTE
+    def apply(plan: LogicalPlan): LogicalPlan = plan match {
+      case With(child, relations) => substituteCTE(child, relations)
+      case other => other
+    }
+
+    def substituteCTE(plan: LogicalPlan, cteRelations: Map[String, LogicalPlan]): LogicalPlan = {
+      plan transform {
+        // In hive, if there is same table name in database and CTE definition,
+        // hive will use the table in database, not the CTE one.
+        // Taking into account the reasonableness and the implementation complexity,
+        // here use the CTE definition first, check table name only and ignore database name
+        // see https://github.com/apache/spark/pull/4929#discussion_r27186638 for more info
+        case u : UnresolvedRelation =>
+          val substituted = cteRelations.get(u.tableIdentifier.last).map { relation =>
+            val withAlias = u.alias.map(Subquery(_, relation))
+            withAlias.getOrElse(relation)
+          }
+          substituted.getOrElse(u)
+      }
+    }
+  }
+
+  /**
+   * Substitute child plan with WindowSpecDefinitions.
+   */
+  object WindowsSubstitution extends Rule[LogicalPlan] {
+    def apply(plan: LogicalPlan): LogicalPlan = plan transform {
+      // Lookup WindowSpecDefinitions. This rule works with unresolved children.
+      case WithWindowDefinition(windowDefinitions, child) =>
+        child.transform {
+          case plan => plan.transformExpressions {
+            case UnresolvedWindowExpression(c, WindowSpecReference(windowName)) =>
+              val errorMessage =
+                s"Window specification $windowName is not defined in the WINDOW clause."
+              val windowSpecDefinition =
+                windowDefinitions
+                  .get(windowName)
+                  .getOrElse(failAnalysis(errorMessage))
+              WindowExpression(c, windowSpecDefinition)
+          }
+        }
+    }
+  }
+
   /**
    * Removes no-op Alias expressions from the plan.
    */
@@ -172,36 +225,20 @@ class Analyzer(
    * Replaces [[UnresolvedRelation]]s with concrete relations from the catalog.
    */
   object ResolveRelations extends Rule[LogicalPlan] {
-    def getTable(u: UnresolvedRelation, cteRelations: Map[String, LogicalPlan]): LogicalPlan = {
+    def getTable(u: UnresolvedRelation): LogicalPlan = {
       try {
-        // In hive, if there is same table name in database and CTE definition,
-        // hive will use the table in database, not the CTE one.
-        // Taking into account the reasonableness and the implementation complexity,
-        // here use the CTE definition first, check table name only and ignore database name
-        cteRelations.get(u.tableIdentifier.last)
-          .map(relation => u.alias.map(Subquery(_, relation)).getOrElse(relation))
-          .getOrElse(catalog.lookupRelation(u.tableIdentifier, u.alias))
+        catalog.lookupRelation(u.tableIdentifier, u.alias)
       } catch {
         case _: NoSuchTableException =>
           u.failAnalysis(s"no such table ${u.tableName}")
       }
     }
 
-    def apply(plan: LogicalPlan): LogicalPlan = {
-      val (realPlan, cteRelations) = plan match {
-        // TODO allow subquery to define CTE
-        // Add cte table to a temp relation map,drop `with` plan and keep its child
-        case With(child, relations) => (child, relations)
-        case other => (other, Map.empty[String, LogicalPlan])
-      }
-
-      realPlan transform {
-        case i@InsertIntoTable(u: UnresolvedRelation, _, _, _, _) =>
-          i.copy(
-            table = EliminateSubQueries(getTable(u, cteRelations)))
-        case u: UnresolvedRelation =>
-          getTable(u, cteRelations)
-      }
+    def apply(plan: LogicalPlan): LogicalPlan = plan transform {
+      case i@InsertIntoTable(u: UnresolvedRelation, _, _, _, _) =>
+        i.copy(table = EliminateSubQueries(getTable(u)))
+      case u: UnresolvedRelation =>
+        getTable(u)
     }
   }
 
@@ -664,21 +701,6 @@ class Analyzer(
     // We have to use transformDown at here to make sure the rule of
     // "Aggregate with Having clause" will be triggered.
     def apply(plan: LogicalPlan): LogicalPlan = plan transformDown {
-      // Lookup WindowSpecDefinitions. This rule works with unresolved children.
-      case WithWindowDefinition(windowDefinitions, child) =>
-        child.transform {
-          case plan => plan.transformExpressions {
-            case UnresolvedWindowExpression(c, WindowSpecReference(windowName)) =>
-              val errorMessage =
-                s"Window specification $windowName is not defined in the WINDOW clause."
-              val windowSpecDefinition =
-                windowDefinitions
-                  .get(windowName)
-                  .getOrElse(failAnalysis(errorMessage))
-              WindowExpression(c, windowSpecDefinition)
-          }
-        }
-
       // Aggregate with Having clause. This rule works with an unresolved Aggregate because
       // a resolved Aggregate will not have Window Functions.
       case f @ Filter(condition, a @ Aggregate(groupingExprs, aggregateExprs, child))

From c2f0821aad3b82dcd327e914c9b297e92526649d Mon Sep 17 00:00:00 2001
From: "Zhang, Liye" <liye.zhang@intel.com>
Date: Fri, 8 May 2015 09:10:58 +0100
Subject: [PATCH 43/48] [SPARK-7392] [CORE] bugfix: Kryo buffer size cannot be
 larger than 2M

Author: Zhang, Liye <liye.zhang@intel.com>

Closes #5934 from liyezhang556520/kryoBufSize and squashes the following commits:

5707e04 [Zhang, Liye] fix import order
8693288 [Zhang, Liye] replace multiplier with ByteUnit methods
9bf93e9 [Zhang, Liye] add tests
d91e5ed [Zhang, Liye] change kb to mb
---
 .../spark/serializer/KryoSerializer.scala     | 11 +++----
 .../serializer/KryoSerializerSuite.scala      | 30 +++++++++++++++++++
 2 files changed, 36 insertions(+), 5 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala b/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala
index f9f78852f032b..64ba27f34d2f1 100644
--- a/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala
+++ b/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala
@@ -32,6 +32,7 @@ import org.apache.spark._
 import org.apache.spark.api.python.PythonBroadcast
 import org.apache.spark.broadcast.HttpBroadcast
 import org.apache.spark.network.nio.{GetBlock, GotBlock, PutBlock}
+import org.apache.spark.network.util.ByteUnit
 import org.apache.spark.scheduler.{CompressedMapStatus, HighlyCompressedMapStatus}
 import org.apache.spark.storage._
 import org.apache.spark.util.BoundedPriorityQueue
@@ -51,18 +52,18 @@ class KryoSerializer(conf: SparkConf)
 
   private val bufferSizeKb = conf.getSizeAsKb("spark.kryoserializer.buffer", "64k")
   
-  if (bufferSizeKb >= 2048) {
+  if (bufferSizeKb >= ByteUnit.GiB.toKiB(2)) {
     throw new IllegalArgumentException("spark.kryoserializer.buffer must be less than " +
-      s"2048 mb, got: + $bufferSizeKb mb.")
+      s"2048 mb, got: + ${ByteUnit.KiB.toMiB(bufferSizeKb)} mb.")
   }
-  private val bufferSize = (bufferSizeKb * 1024).toInt
+  private val bufferSize = ByteUnit.KiB.toBytes(bufferSizeKb).toInt
 
   val maxBufferSizeMb = conf.getSizeAsMb("spark.kryoserializer.buffer.max", "64m").toInt
-  if (maxBufferSizeMb >= 2048) {
+  if (maxBufferSizeMb >= ByteUnit.GiB.toMiB(2)) {
     throw new IllegalArgumentException("spark.kryoserializer.buffer.max must be less than " +
       s"2048 mb, got: + $maxBufferSizeMb mb.")
   }
-  private val maxBufferSize = maxBufferSizeMb * 1024 * 1024
+  private val maxBufferSize = ByteUnit.MiB.toBytes(maxBufferSizeMb).toInt
 
   private val referenceTracking = conf.getBoolean("spark.kryo.referenceTracking", true)
   private val registrationRequired = conf.getBoolean("spark.kryo.registrationRequired", false)
diff --git a/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala b/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala
index 778a7eee73b23..c7369de24b81f 100644
--- a/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala
@@ -32,6 +32,36 @@ class KryoSerializerSuite extends FunSuite with SharedSparkContext {
   conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
   conf.set("spark.kryo.registrator", classOf[MyRegistrator].getName)
 
+  test("configuration limits") {
+    val conf1 = conf.clone()
+    val kryoBufferProperty = "spark.kryoserializer.buffer"
+    val kryoBufferMaxProperty = "spark.kryoserializer.buffer.max"
+    conf1.set(kryoBufferProperty, "64k")
+    conf1.set(kryoBufferMaxProperty, "64m")
+    new KryoSerializer(conf1).newInstance()
+    // 2048m = 2097152k
+    conf1.set(kryoBufferProperty, "2097151k")
+    conf1.set(kryoBufferMaxProperty, "64m")
+    // should not throw exception when kryoBufferMaxProperty < kryoBufferProperty
+    new KryoSerializer(conf1).newInstance()
+    conf1.set(kryoBufferMaxProperty, "2097151k")
+    new KryoSerializer(conf1).newInstance()
+    val conf2 = conf.clone()
+    conf2.set(kryoBufferProperty, "2048m")
+    val thrown1 = intercept[IllegalArgumentException](new KryoSerializer(conf2).newInstance())
+    assert(thrown1.getMessage.contains(kryoBufferProperty))
+    val conf3 = conf.clone()
+    conf3.set(kryoBufferMaxProperty, "2048m")
+    val thrown2 = intercept[IllegalArgumentException](new KryoSerializer(conf3).newInstance())
+    assert(thrown2.getMessage.contains(kryoBufferMaxProperty))
+    val conf4 = conf.clone()
+    conf4.set(kryoBufferProperty, "2g")
+    conf4.set(kryoBufferMaxProperty, "3g")
+    val thrown3 = intercept[IllegalArgumentException](new KryoSerializer(conf4).newInstance())
+    assert(thrown3.getMessage.contains(kryoBufferProperty))
+    assert(!thrown3.getMessage.contains(kryoBufferMaxProperty))
+  }
+  
   test("basic types") {
     val ser = new KryoSerializer(conf).newInstance()
     def check[T: ClassTag](t: T) {

From ebff7327af5efa9f57c605284de4fae6b050ae0f Mon Sep 17 00:00:00 2001
From: Lianhui Wang <lianhuiwang09@gmail.com>
Date: Fri, 8 May 2015 08:44:46 -0500
Subject: [PATCH 44/48] [SPARK-6869] [PYSPARK] Add pyspark archives path to
 PYTHONPATH

Based on https://github.com/apache/spark/pull/5478 that provide a PYSPARK_ARCHIVES_PATH env. within this PR, we just should export PYSPARK_ARCHIVES_PATH=/user/spark/pyspark.zip,/user/spark/python/lib/py4j-0.8.2.1-src.zip in conf/spark-env.sh when we don't install PySpark on each node of Yarn. i run python application successfully on yarn-client and yarn-cluster with this PR.
andrewor14 sryza Sephiroth-Lin Can you take a look at this?thanks.

Author: Lianhui Wang <lianhuiwang09@gmail.com>

Closes #5580 from lianhuiwang/SPARK-6869 and squashes the following commits:

66ffa43 [Lianhui Wang] Update Client.scala
c2ad0f9 [Lianhui Wang] Update Client.scala
1c8f664 [Lianhui Wang] Merge remote-tracking branch 'remotes/apache/master' into SPARK-6869
008850a [Lianhui Wang] Merge remote-tracking branch 'remotes/apache/master' into SPARK-6869
f0b4ed8 [Lianhui Wang] Merge remote-tracking branch 'remotes/apache/master' into SPARK-6869
150907b [Lianhui Wang] Merge remote-tracking branch 'remotes/apache/master' into SPARK-6869
20402cd [Lianhui Wang] use ZipEntry
9d87c3f [Lianhui Wang] update scala style
e7bd971 [Lianhui Wang] address vanzin's comments
4b8a3ed [Lianhui Wang] use pyArchivesEnvOpt
e6b573b [Lianhui Wang] address vanzin's comments
f11f84a [Lianhui Wang] zip pyspark archives
5192cca [Lianhui Wang] update import path
3b1e4c8 [Lianhui Wang] address tgravescs's comments
9396346 [Lianhui Wang] put zip to make-distribution.sh
0d2baf7 [Lianhui Wang] update import paths
e0179be [Lianhui Wang] add zip pyspark archives in build or sparksubmit
31e8e06 [Lianhui Wang] update code style
9f31dac [Lianhui Wang] update code and add comments
f72987c [Lianhui Wang] add archives path to PYTHONPATH
---
 assembly/pom.xml                              | 21 ++++++++++
 .../org/apache/spark/deploy/SparkSubmit.scala | 41 +++++++++++++++++++
 project/SparkBuild.scala                      | 37 ++++++++++++++++-
 .../org/apache/spark/deploy/yarn/Client.scala | 23 ++++++++---
 4 files changed, 114 insertions(+), 8 deletions(-)

diff --git a/assembly/pom.xml b/assembly/pom.xml
index 2b4d0a990bf22..626c8577e31fe 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -92,6 +92,27 @@
           <skip>true</skip>
         </configuration>
       </plugin>
+        <!-- zip pyspark archives to run python application on yarn mode -->
+        <plugin>
+          <groupId>org.apache.maven.plugins</groupId>
+            <artifactId>maven-antrun-plugin</artifactId>
+            <executions>
+              <execution>
+                <phase>package</phase>
+                  <goals>
+                    <goal>run</goal>
+                  </goals>
+              </execution>
+            </executions>
+            <configuration>
+              <target>
+                <delete dir="${basedir}/../python/lib/pyspark.zip"/>
+                <zip destfile="${basedir}/../python/lib/pyspark.zip">
+                  <fileset dir="${basedir}/../python/" includes="pyspark/**/*"/>
+                </zip>
+              </target>
+            </configuration>
+        </plugin>
       <!-- Use the shade plugin to create a big JAR with all the dependencies -->
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
index 8a0327984e195..329fa06ba8ba5 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
@@ -332,6 +332,47 @@ object SparkSubmit {
       }
     }
 
+    // In yarn mode for a python app, add pyspark archives to files
+    // that can be distributed with the job
+    if (args.isPython && clusterManager == YARN) {
+      var pyArchives: String = null
+      val pyArchivesEnvOpt = sys.env.get("PYSPARK_ARCHIVES_PATH")
+      if (pyArchivesEnvOpt.isDefined) {
+        pyArchives = pyArchivesEnvOpt.get
+      } else {
+        if (!sys.env.contains("SPARK_HOME")) {
+          printErrorAndExit("SPARK_HOME does not exist for python application in yarn mode.")
+        }
+        val pythonPath = new ArrayBuffer[String]
+        for (sparkHome <- sys.env.get("SPARK_HOME")) {
+          val pyLibPath = Seq(sparkHome, "python", "lib").mkString(File.separator)
+          val pyArchivesFile = new File(pyLibPath, "pyspark.zip")
+          if (!pyArchivesFile.exists()) {
+            printErrorAndExit("pyspark.zip does not exist for python application in yarn mode.")
+          }
+          val py4jFile = new File(pyLibPath, "py4j-0.8.2.1-src.zip")
+          if (!py4jFile.exists()) {
+            printErrorAndExit("py4j-0.8.2.1-src.zip does not exist for python application " +
+              "in yarn mode.")
+          }
+          pythonPath += pyArchivesFile.getAbsolutePath()
+          pythonPath += py4jFile.getAbsolutePath()
+        }
+        pyArchives = pythonPath.mkString(",")
+      }
+
+      pyArchives = pyArchives.split(",").map { localPath=>
+        val localURI = Utils.resolveURI(localPath)
+        if (localURI.getScheme != "local") {
+          args.files = mergeFileLists(args.files, localURI.toString)
+          new Path(localPath).getName
+        } else {
+          localURI.getPath
+        }
+      }.mkString(File.pathSeparator)
+      sysProps("spark.submit.pyArchives") = pyArchives
+    }
+
     // If we're running a R app, set the main class to our specific R runner
     if (args.isR && deployMode == CLIENT) {
       if (args.primaryResource == SPARKR_SHELL) {
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 026855f8f6a5a..186345af0e60e 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -370,6 +370,7 @@ object Assembly {
 object PySparkAssembly {
   import sbtassembly.Plugin._
   import AssemblyKeys._
+  import java.util.zip.{ZipOutputStream, ZipEntry}
 
   lazy val settings = Seq(
     unmanagedJars in Compile += { BuildCommons.sparkHome / "python/lib/py4j-0.8.2.1-src.zip" },
@@ -377,16 +378,48 @@ object PySparkAssembly {
     // to be included in the assembly. We can't just add "python/" to the assembly's resource dir
     // list since that will copy unneeded / unwanted files.
     resourceGenerators in Compile <+= resourceManaged in Compile map { outDir: File =>
+      val src = new File(BuildCommons.sparkHome, "python/pyspark")
+
+      val zipFile = new File(BuildCommons.sparkHome , "python/lib/pyspark.zip")
+      zipFile.delete()
+      zipRecursive(src, zipFile)
+
       val dst = new File(outDir, "pyspark")
       if (!dst.isDirectory()) {
         require(dst.mkdirs())
       }
-
-      val src = new File(BuildCommons.sparkHome, "python/pyspark")
       copy(src, dst)
     }
   )
 
+  private def zipRecursive(source: File, destZipFile: File) = {
+    val destOutput = new ZipOutputStream(new FileOutputStream(destZipFile))
+    addFilesToZipStream("", source, destOutput)
+    destOutput.flush()
+    destOutput.close()
+  }
+
+  private def addFilesToZipStream(parent: String, source: File, output: ZipOutputStream): Unit = {
+    if (source.isDirectory()) {
+      output.putNextEntry(new ZipEntry(parent + source.getName()))
+      for (file <- source.listFiles()) {
+        addFilesToZipStream(parent + source.getName() + File.separator, file, output)
+      }
+    } else {
+      val in = new FileInputStream(source)
+      output.putNextEntry(new ZipEntry(parent + source.getName()))
+      val buf = new Array[Byte](8192)
+      var n = 0
+      while (n != -1) {
+        n = in.read(buf)
+        if (n != -1) {
+          output.write(buf, 0, n)
+        }
+      }
+      in.close()
+    }
+  }
+
   private def copy(src: File, dst: File): Seq[File] = {
     src.listFiles().flatMap { f =>
       val child = new File(dst, f.getName())
diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
index 20ecaf092e3f8..d21a7393478ce 100644
--- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
+++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
@@ -468,6 +468,17 @@ private[spark] class Client(
       env("SPARK_YARN_USER_ENV") = userEnvs
     }
 
+    // if spark.submit.pyArchives is in sparkConf, append pyArchives to PYTHONPATH
+    // that can be passed on to the ApplicationMaster and the executors.
+    if (sparkConf.contains("spark.submit.pyArchives")) {
+      var pythonPath = sparkConf.get("spark.submit.pyArchives")
+      if (env.contains("PYTHONPATH")) {
+        pythonPath = Seq(env.get("PYTHONPATH"), pythonPath).mkString(File.pathSeparator)
+      }
+      env("PYTHONPATH") = pythonPath
+      sparkConf.setExecutorEnv("PYTHONPATH", pythonPath)
+    }
+
     // In cluster mode, if the deprecated SPARK_JAVA_OPTS is set, we need to propagate it to
     // executors. But we can't just set spark.executor.extraJavaOptions, because the driver's
     // SparkContext will not let that set spark* system properties, which is expected behavior for
@@ -1074,7 +1085,7 @@ object Client extends Logging {
         val hiveConf = hiveClass.getMethod("getConf").invoke(hive)
         val hiveConfClass = mirror.classLoader.loadClass("org.apache.hadoop.hive.conf.HiveConf")
 
-        val hiveConfGet = (param:String) => Option(hiveConfClass
+        val hiveConfGet = (param: String) => Option(hiveConfClass
           .getMethod("get", classOf[java.lang.String])
           .invoke(hiveConf, param))
 
@@ -1096,7 +1107,7 @@ object Client extends Logging {
 
             val hive2Token = new Token[DelegationTokenIdentifier]()
             hive2Token.decodeFromUrlString(tokenStr)
-            credentials.addToken(new Text("hive.server2.delegation.token"),hive2Token)
+            credentials.addToken(new Text("hive.server2.delegation.token"), hive2Token)
             logDebug("Added hive.Server2.delegation.token to conf.")
             hiveClass.getMethod("closeCurrent").invoke(null)
           } else {
@@ -1141,13 +1152,13 @@ object Client extends Logging {
 
         logInfo("Added HBase security token to credentials.")
       } catch {
-        case e:java.lang.NoSuchMethodException =>
+        case e: java.lang.NoSuchMethodException =>
           logInfo("HBase Method not found: " + e)
-        case e:java.lang.ClassNotFoundException =>
+        case e: java.lang.ClassNotFoundException =>
           logDebug("HBase Class not found: " + e)
-        case e:java.lang.NoClassDefFoundError =>
+        case e: java.lang.NoClassDefFoundError =>
           logDebug("HBase Class not found: " + e)
-        case e:Exception =>
+        case e: Exception =>
           logError("Exception when obtaining HBase security token: " + e)
       }
     }

From c796be70f36e262b6a2ce75924bd970f40bf4045 Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Fri, 8 May 2015 16:54:32 +0100
Subject: [PATCH 45/48] [SPARK-3454] separate json endpoints for data in the UI

Exposes data available in the UI as json over http.  Key points:

* new endpoints, handled independently of existing XyzPage classes.  Root entrypoint is `JsonRootResource`
* Uses jersey + jackson for routing & converting POJOs into json
* tests against known results in `HistoryServerSuite`
* also fixes some minor issues w/ the UI -- synchronizing on access to `StorageListener` & `StorageStatusListener`, and fixing some inconsistencies w/ the way we handle retained jobs & stages.

Author: Imran Rashid <irashid@cloudera.com>

Closes #5940 from squito/SPARK-3454_better_test_files and squashes the following commits:

1a72ed6 [Imran Rashid] rats
85fdb3e [Imran Rashid] Merge branch 'no_php' into SPARK-3454
1fc65b0 [Imran Rashid] Revert "Revert "[SPARK-3454] separate json endpoints for data in the UI""
1276900 [Imran Rashid] get rid of giant event file, replace w/ smaller one; check both shuffle read & shuffle write
4e12013 [Imran Rashid] just use test case name for expectation file name
863ef64 [Imran Rashid] rename json files to avoid strange file names and not look like php
---
 .rat-excludes                                 |    7 +
 core/pom.xml                                  |    8 +
 .../org/apache/spark/JobExecutionStatus.java  |    8 +-
 .../status/api/v1/ApplicationStatus.java      |   30 +
 .../spark/status/api/v1/StageStatus.java      |   31 +
 .../spark/status/api/v1/TaskSorting.java      |   48 +
 .../java/org/apache/spark/util/EnumUtil.java  |   38 +
 .../scala/org/apache/spark/SparkContext.scala |    2 +-
 .../history/ApplicationHistoryProvider.scala  |    4 +-
 .../deploy/history/FsHistoryProvider.scala    |   14 +-
 .../spark/deploy/history/HistoryServer.scala  |   20 +-
 .../spark/deploy/master/ApplicationInfo.scala |    2 +-
 .../apache/spark/deploy/master/Master.scala   |   14 +-
 .../deploy/master/ui/ApplicationPage.scala    |   19 +-
 .../spark/deploy/master/ui/MasterPage.scala   |   12 +-
 .../spark/deploy/master/ui/MasterWebUI.scala  |   24 +-
 .../spark/status/api/v1/AllJobsResource.scala |   98 ++
 .../spark/status/api/v1/AllRDDResource.scala  |  104 ++
 .../status/api/v1/AllStagesResource.scala     |  309 ++++
 .../api/v1/ApplicationListResource.scala      |   94 ++
 .../status/api/v1/ExecutorListResource.scala  |   36 +
 .../status/api/v1/JacksonMessageWriter.scala  |   93 ++
 .../status/api/v1/JsonRootResource.scala      |  255 +++
 .../api/v1/OneApplicationResource.scala       |   31 +
 .../spark/status/api/v1/OneJobResource.scala  |   41 +
 .../spark/status/api/v1/OneRDDResource.scala  |   34 +
 .../status/api/v1/OneStageResource.scala      |  150 ++
 .../spark/status/api/v1/SecurityFilter.scala  |   38 +
 .../spark/status/api/v1/SimpleDateParam.scala |   55 +
 .../org/apache/spark/status/api/v1/api.scala  |  228 +++
 .../spark/storage/StorageStatusListener.scala |    6 +-
 .../scala/org/apache/spark/ui/SparkUI.scala   |   49 +-
 .../scala/org/apache/spark/ui/WebUI.scala     |    8 +-
 .../apache/spark/ui/exec/ExecutorsPage.scala  |   17 +-
 .../apache/spark/ui/jobs/AllJobsPage.scala    |   14 +-
 .../apache/spark/ui/jobs/AllStagesPage.scala  |   12 +-
 .../org/apache/spark/ui/jobs/JobPage.scala    |    2 +-
 .../spark/ui/jobs/JobProgressListener.scala   |    4 +
 .../org/apache/spark/ui/jobs/PoolPage.scala   |    2 +-
 .../org/apache/spark/ui/jobs/StagePage.scala  |   19 +-
 .../org/apache/spark/ui/storage/RDDPage.scala |   73 +-
 .../apache/spark/ui/storage/StoragePage.scala |    2 +-
 .../apache/spark/ui/storage/StorageTab.scala  |    6 +-
 .../application_list_json_expectation.json    |   53 +
 .../complete_stage_list_json_expectation.json |   67 +
 .../completed_app_list_json_expectation.json  |   53 +
 .../executor_list_json_expectation.json       |   17 +
 .../failed_stage_list_json_expectation.json   |   23 +
 ...multi_attempt_app_json_1__expectation.json |   15 +
 ...multi_attempt_app_json_2__expectation.json |   15 +
 .../job_list_json_expectation.json            |   43 +
 .../maxDate2_app_list_json_expectation.json   |   10 +
 .../maxDate_app_list_json_expectation.json    |   19 +
 .../minDate_app_list_json_expectation.json    |   35 +
 .../one_app_json_expectation.json             |   10 +
 ...ne_app_multi_attempt_json_expectation.json |   17 +
 .../one_job_json_expectation.json             |   15 +
 .../one_rdd_storage_json_expectation.json     |   64 +
 .../one_stage_attempt_json_expectation.json   |  270 ++++
 .../one_stage_json_expectation.json           |  270 ++++
 .../rdd_list_storage_json_expectation.json    |    9 +
 .../running_app_list_json_expectation.json    |    1 +
 .../stage_list_json_expectation.json          |   89 ++
 ...ist_with_accumulable_json_expectation.json |   27 +
 .../stage_task_list_expectation.json          |  561 +++++++
 ...multi_attempt_app_json_1__expectation.json |  193 +++
 ...multi_attempt_app_json_2__expectation.json |  193 +++
 ...k_list_w__offset___length_expectation.json | 1401 +++++++++++++++++
 ...stage_task_list_w__sortBy_expectation.json |  561 +++++++
 ...tBy_short_names___runtime_expectation.json |  561 +++++++
 ...rtBy_short_names__runtime_expectation.json |  561 +++++++
 ...mmary_w__custom_quantiles_expectation.json |   19 +
 ...sk_summary_w_shuffle_read_expectation.json |   19 +
 ...k_summary_w_shuffle_write_expectation.json |   19 +
 ...age_with_accumulable_json_expectation.json |  242 +++
 ...eded_failed_job_list_json_expectation.json |   43 +
 .../succeeded_job_list_json_expectation.json  |   29 +
 .../local-1422981759269/APPLICATION_COMPLETE  |    0
 .../local-1422981759269/EVENT_LOG_1           |   88 ++
 .../local-1422981759269/SPARK_VERSION_1.2.0   |    0
 .../local-1422981780767/APPLICATION_COMPLETE  |    0
 .../local-1422981780767/EVENT_LOG_1           |   82 +
 .../local-1422981780767/SPARK_VERSION_1.2.0   |    0
 .../local-1425081759269/APPLICATION_COMPLETE  |    0
 .../local-1425081759269/EVENT_LOG_1           |   88 ++
 .../local-1425081759269/SPARK_VERSION_1.2.0   |    0
 .../local-1426533911241/APPLICATION_COMPLETE  |    0
 .../local-1426533911241/EVENT_LOG_1           |   24 +
 .../local-1426533911241/SPARK_VERSION_1.2.0   |    0
 .../local-1426633911242/APPLICATION_COMPLETE  |    0
 .../local-1426633911242/EVENT_LOG_1           |   24 +
 .../local-1426633911242/SPARK_VERSION_1.2.0   |    0
 .../spark-events/local-1430917381534          |  231 +++
 .../org/apache/spark/JsonTestUtils.scala      |   34 +
 .../spark/deploy/JsonProtocolSuite.scala      |   14 +-
 .../deploy/history/HistoryServerSuite.scala   |  231 ++-
 .../status/api/v1/SimpleDateParamTest.scala   |   29 +
 .../org/apache/spark/ui/UISeleniumSuite.scala |  264 +++-
 docs/monitoring.md                            |   74 +
 pom.xml                                       |   12 +
 100 files changed, 8608 insertions(+), 172 deletions(-)
 create mode 100644 core/src/main/java/org/apache/spark/status/api/v1/ApplicationStatus.java
 create mode 100644 core/src/main/java/org/apache/spark/status/api/v1/StageStatus.java
 create mode 100644 core/src/main/java/org/apache/spark/status/api/v1/TaskSorting.java
 create mode 100644 core/src/main/java/org/apache/spark/util/EnumUtil.java
 create mode 100644 core/src/main/scala/org/apache/spark/status/api/v1/AllJobsResource.scala
 create mode 100644 core/src/main/scala/org/apache/spark/status/api/v1/AllRDDResource.scala
 create mode 100644 core/src/main/scala/org/apache/spark/status/api/v1/AllStagesResource.scala
 create mode 100644 core/src/main/scala/org/apache/spark/status/api/v1/ApplicationListResource.scala
 create mode 100644 core/src/main/scala/org/apache/spark/status/api/v1/ExecutorListResource.scala
 create mode 100644 core/src/main/scala/org/apache/spark/status/api/v1/JacksonMessageWriter.scala
 create mode 100644 core/src/main/scala/org/apache/spark/status/api/v1/JsonRootResource.scala
 create mode 100644 core/src/main/scala/org/apache/spark/status/api/v1/OneApplicationResource.scala
 create mode 100644 core/src/main/scala/org/apache/spark/status/api/v1/OneJobResource.scala
 create mode 100644 core/src/main/scala/org/apache/spark/status/api/v1/OneRDDResource.scala
 create mode 100644 core/src/main/scala/org/apache/spark/status/api/v1/OneStageResource.scala
 create mode 100644 core/src/main/scala/org/apache/spark/status/api/v1/SecurityFilter.scala
 create mode 100644 core/src/main/scala/org/apache/spark/status/api/v1/SimpleDateParam.scala
 create mode 100644 core/src/main/scala/org/apache/spark/status/api/v1/api.scala
 create mode 100644 core/src/test/resources/HistoryServerExpectations/application_list_json_expectation.json
 create mode 100644 core/src/test/resources/HistoryServerExpectations/complete_stage_list_json_expectation.json
 create mode 100644 core/src/test/resources/HistoryServerExpectations/completed_app_list_json_expectation.json
 create mode 100644 core/src/test/resources/HistoryServerExpectations/executor_list_json_expectation.json
 create mode 100644 core/src/test/resources/HistoryServerExpectations/failed_stage_list_json_expectation.json
 create mode 100644 core/src/test/resources/HistoryServerExpectations/job_list_from_multi_attempt_app_json_1__expectation.json
 create mode 100644 core/src/test/resources/HistoryServerExpectations/job_list_from_multi_attempt_app_json_2__expectation.json
 create mode 100644 core/src/test/resources/HistoryServerExpectations/job_list_json_expectation.json
 create mode 100644 core/src/test/resources/HistoryServerExpectations/maxDate2_app_list_json_expectation.json
 create mode 100644 core/src/test/resources/HistoryServerExpectations/maxDate_app_list_json_expectation.json
 create mode 100644 core/src/test/resources/HistoryServerExpectations/minDate_app_list_json_expectation.json
 create mode 100644 core/src/test/resources/HistoryServerExpectations/one_app_json_expectation.json
 create mode 100644 core/src/test/resources/HistoryServerExpectations/one_app_multi_attempt_json_expectation.json
 create mode 100644 core/src/test/resources/HistoryServerExpectations/one_job_json_expectation.json
 create mode 100644 core/src/test/resources/HistoryServerExpectations/one_rdd_storage_json_expectation.json
 create mode 100644 core/src/test/resources/HistoryServerExpectations/one_stage_attempt_json_expectation.json
 create mode 100644 core/src/test/resources/HistoryServerExpectations/one_stage_json_expectation.json
 create mode 100644 core/src/test/resources/HistoryServerExpectations/rdd_list_storage_json_expectation.json
 create mode 100644 core/src/test/resources/HistoryServerExpectations/running_app_list_json_expectation.json
 create mode 100644 core/src/test/resources/HistoryServerExpectations/stage_list_json_expectation.json
 create mode 100644 core/src/test/resources/HistoryServerExpectations/stage_list_with_accumulable_json_expectation.json
 create mode 100644 core/src/test/resources/HistoryServerExpectations/stage_task_list_expectation.json
 create mode 100644 core/src/test/resources/HistoryServerExpectations/stage_task_list_from_multi_attempt_app_json_1__expectation.json
 create mode 100644 core/src/test/resources/HistoryServerExpectations/stage_task_list_from_multi_attempt_app_json_2__expectation.json
 create mode 100644 core/src/test/resources/HistoryServerExpectations/stage_task_list_w__offset___length_expectation.json
 create mode 100644 core/src/test/resources/HistoryServerExpectations/stage_task_list_w__sortBy_expectation.json
 create mode 100644 core/src/test/resources/HistoryServerExpectations/stage_task_list_w__sortBy_short_names___runtime_expectation.json
 create mode 100644 core/src/test/resources/HistoryServerExpectations/stage_task_list_w__sortBy_short_names__runtime_expectation.json
 create mode 100644 core/src/test/resources/HistoryServerExpectations/stage_task_summary_w__custom_quantiles_expectation.json
 create mode 100644 core/src/test/resources/HistoryServerExpectations/stage_task_summary_w_shuffle_read_expectation.json
 create mode 100644 core/src/test/resources/HistoryServerExpectations/stage_task_summary_w_shuffle_write_expectation.json
 create mode 100644 core/src/test/resources/HistoryServerExpectations/stage_with_accumulable_json_expectation.json
 create mode 100644 core/src/test/resources/HistoryServerExpectations/succeeded_failed_job_list_json_expectation.json
 create mode 100644 core/src/test/resources/HistoryServerExpectations/succeeded_job_list_json_expectation.json
 create mode 100755 core/src/test/resources/spark-events/local-1422981759269/APPLICATION_COMPLETE
 create mode 100755 core/src/test/resources/spark-events/local-1422981759269/EVENT_LOG_1
 create mode 100755 core/src/test/resources/spark-events/local-1422981759269/SPARK_VERSION_1.2.0
 create mode 100755 core/src/test/resources/spark-events/local-1422981780767/APPLICATION_COMPLETE
 create mode 100755 core/src/test/resources/spark-events/local-1422981780767/EVENT_LOG_1
 create mode 100755 core/src/test/resources/spark-events/local-1422981780767/SPARK_VERSION_1.2.0
 create mode 100755 core/src/test/resources/spark-events/local-1425081759269/APPLICATION_COMPLETE
 create mode 100755 core/src/test/resources/spark-events/local-1425081759269/EVENT_LOG_1
 create mode 100755 core/src/test/resources/spark-events/local-1425081759269/SPARK_VERSION_1.2.0
 create mode 100755 core/src/test/resources/spark-events/local-1426533911241/APPLICATION_COMPLETE
 create mode 100755 core/src/test/resources/spark-events/local-1426533911241/EVENT_LOG_1
 create mode 100755 core/src/test/resources/spark-events/local-1426533911241/SPARK_VERSION_1.2.0
 create mode 100755 core/src/test/resources/spark-events/local-1426633911242/APPLICATION_COMPLETE
 create mode 100755 core/src/test/resources/spark-events/local-1426633911242/EVENT_LOG_1
 create mode 100755 core/src/test/resources/spark-events/local-1426633911242/SPARK_VERSION_1.2.0
 create mode 100755 core/src/test/resources/spark-events/local-1430917381534
 create mode 100644 core/src/test/scala/org/apache/spark/JsonTestUtils.scala
 create mode 100644 core/src/test/scala/org/apache/spark/status/api/v1/SimpleDateParamTest.scala

diff --git a/.rat-excludes b/.rat-excludes
index dccf2db8055ce..dc08c4a3673d2 100644
--- a/.rat-excludes
+++ b/.rat-excludes
@@ -74,5 +74,12 @@ logs
 .*scalastyle-output.xml
 .*dependency-reduced-pom.xml
 known_translations
+json_expectation
+local-1422981759269/*
+local-1422981780767/*
+local-1425081759269/*
+local-1426533911241/*
+local-1426633911242/*
+local-1430917381534/*
 DESCRIPTION
 NAMESPACE
diff --git a/core/pom.xml b/core/pom.xml
index 164a836cb8f0b..fc42f48973fe9 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -228,6 +228,14 @@
       <artifactId>json4s-jackson_${scala.binary.version}</artifactId>
       <version>3.2.10</version>
     </dependency>
+    <dependency>
+      <groupId>com.sun.jersey</groupId>
+      <artifactId>jersey-server</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>com.sun.jersey</groupId>
+      <artifactId>jersey-core</artifactId>
+    </dependency>
     <dependency>
       <groupId>org.apache.mesos</groupId>
       <artifactId>mesos</artifactId>
diff --git a/core/src/main/java/org/apache/spark/JobExecutionStatus.java b/core/src/main/java/org/apache/spark/JobExecutionStatus.java
index 6e161313702bb..0287fb79f8dd2 100644
--- a/core/src/main/java/org/apache/spark/JobExecutionStatus.java
+++ b/core/src/main/java/org/apache/spark/JobExecutionStatus.java
@@ -17,9 +17,15 @@
 
 package org.apache.spark;
 
+import org.apache.spark.util.EnumUtil;
+
 public enum JobExecutionStatus {
   RUNNING,
   SUCCEEDED,
   FAILED,
-  UNKNOWN
+  UNKNOWN;
+
+  public static JobExecutionStatus fromString(String str) {
+    return EnumUtil.parseIgnoreCase(JobExecutionStatus.class, str);
+  }
 }
diff --git a/core/src/main/java/org/apache/spark/status/api/v1/ApplicationStatus.java b/core/src/main/java/org/apache/spark/status/api/v1/ApplicationStatus.java
new file mode 100644
index 0000000000000..8c7dcf776fda8
--- /dev/null
+++ b/core/src/main/java/org/apache/spark/status/api/v1/ApplicationStatus.java
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.status.api.v1;
+
+import org.apache.spark.util.EnumUtil;
+
+public enum ApplicationStatus {
+  COMPLETED,
+  RUNNING;
+
+  public static ApplicationStatus fromString(String str) {
+    return EnumUtil.parseIgnoreCase(ApplicationStatus.class, str);
+  }
+
+}
diff --git a/core/src/main/java/org/apache/spark/status/api/v1/StageStatus.java b/core/src/main/java/org/apache/spark/status/api/v1/StageStatus.java
new file mode 100644
index 0000000000000..9dbb565aab707
--- /dev/null
+++ b/core/src/main/java/org/apache/spark/status/api/v1/StageStatus.java
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.status.api.v1;
+
+import org.apache.spark.util.EnumUtil;
+
+public enum StageStatus {
+  ACTIVE,
+  COMPLETE,
+  FAILED,
+  PENDING;
+
+  public static StageStatus fromString(String str) {
+    return EnumUtil.parseIgnoreCase(StageStatus.class, str);
+  }
+}
diff --git a/core/src/main/java/org/apache/spark/status/api/v1/TaskSorting.java b/core/src/main/java/org/apache/spark/status/api/v1/TaskSorting.java
new file mode 100644
index 0000000000000..f19ed01d5aebf
--- /dev/null
+++ b/core/src/main/java/org/apache/spark/status/api/v1/TaskSorting.java
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.status.api.v1;
+
+import org.apache.spark.util.EnumUtil;
+
+import java.util.HashSet;
+import java.util.Set;
+
+public enum TaskSorting {
+  ID,
+  INCREASING_RUNTIME("runtime"),
+  DECREASING_RUNTIME("-runtime");
+
+  private final Set<String> alternateNames;
+  private TaskSorting(String... names) {
+    alternateNames = new HashSet<String>();
+    for (String n: names) {
+      alternateNames.add(n);
+    }
+  }
+
+  public static TaskSorting fromString(String str) {
+    String lower = str.toLowerCase();
+    for (TaskSorting t: values()) {
+      if (t.alternateNames.contains(lower)) {
+        return t;
+      }
+    }
+    return EnumUtil.parseIgnoreCase(TaskSorting.class, str);
+  }
+
+}
diff --git a/core/src/main/java/org/apache/spark/util/EnumUtil.java b/core/src/main/java/org/apache/spark/util/EnumUtil.java
new file mode 100644
index 0000000000000..c40c7e727613c
--- /dev/null
+++ b/core/src/main/java/org/apache/spark/util/EnumUtil.java
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.util;
+
+import com.google.common.base.Joiner;
+import org.apache.spark.annotation.Private;
+
+@Private
+public class EnumUtil {
+  public static <E extends Enum<E>> E parseIgnoreCase(Class<E> clz, String str) {
+    E[] constants = clz.getEnumConstants();
+    if (str == null) {
+      return null;
+    }
+    for (E e : constants) {
+      if (e.name().equalsIgnoreCase(str)) {
+        return e;
+      }
+    }
+    throw new IllegalArgumentException(
+      String.format("Illegal type='%s'. Supported type values: %s",
+        str, Joiner.on(", ").join(constants)));
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
index b5f040ceb15ca..b59f562d05ead 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -430,7 +430,7 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli
     _ui =
       if (conf.getBoolean("spark.ui.enabled", true)) {
         Some(SparkUI.createLiveUI(this, _conf, listenerBus, _jobProgressListener,
-          _env.securityManager,appName))
+          _env.securityManager,appName, startTime = startTime))
       } else {
         // For tests, do not enable the UI
         None
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/ApplicationHistoryProvider.scala b/core/src/main/scala/org/apache/spark/deploy/history/ApplicationHistoryProvider.scala
index 6a5011af17458..298a8201960d1 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/ApplicationHistoryProvider.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/ApplicationHistoryProvider.scala
@@ -19,7 +19,7 @@ package org.apache.spark.deploy.history
 
 import org.apache.spark.ui.SparkUI
 
-private[history] case class ApplicationAttemptInfo(
+private[spark] case class ApplicationAttemptInfo(
     attemptId: Option[String],
     startTime: Long,
     endTime: Long,
@@ -27,7 +27,7 @@ private[history] case class ApplicationAttemptInfo(
     sparkUser: String,
     completed: Boolean = false)
 
-private[history] case class ApplicationHistoryInfo(
+private[spark] case class ApplicationHistoryInfo(
     id: String,
     name: String,
     attempts: List[ApplicationAttemptInfo])
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
index 993763f3aa092..45c2be34c8680 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
@@ -17,23 +17,21 @@
 
 package org.apache.spark.deploy.history
 
-import java.io.{IOException, BufferedInputStream, FileNotFoundException, InputStream}
+import java.io.{BufferedInputStream, FileNotFoundException, IOException, InputStream}
 import java.util.concurrent.{ExecutorService, Executors, TimeUnit}
 
 import scala.collection.mutable
-import scala.concurrent.duration.Duration
 
-import com.google.common.util.concurrent.ThreadFactoryBuilder
-
-import com.google.common.util.concurrent.MoreExecutors
-import org.apache.hadoop.fs.permission.AccessControlException
+import com.google.common.util.concurrent.{MoreExecutors, ThreadFactoryBuilder}
 import org.apache.hadoop.fs.{FileStatus, Path}
+import org.apache.hadoop.fs.permission.AccessControlException
+
+import org.apache.spark.{Logging, SecurityManager, SparkConf}
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.io.CompressionCodec
 import org.apache.spark.scheduler._
 import org.apache.spark.ui.SparkUI
 import org.apache.spark.util.{Clock, SystemClock, ThreadUtils, Utils}
-import org.apache.spark.{Logging, SecurityManager, SparkConf}
 
 /**
  * A class that provides application history from event logs stored in the file system.
@@ -151,7 +149,7 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
             val conf = this.conf.clone()
             val appSecManager = new SecurityManager(conf)
             SparkUI.createHistoryUI(conf, replayBus, appSecManager, appId,
-              HistoryServer.getAttemptURI(appId, attempt.attemptId))
+              HistoryServer.getAttemptURI(appId, attempt.attemptId), attempt.startTime)
             // Do not call ui.bind() to avoid creating a new server for each application
           }
 
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
index 754c8e9b6668b..50522e69dc519 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
@@ -25,6 +25,7 @@ import org.eclipse.jetty.servlet.{ServletContextHandler, ServletHolder}
 
 import org.apache.spark.{Logging, SecurityManager, SparkConf}
 import org.apache.spark.deploy.SparkHadoopUtil
+import org.apache.spark.status.api.v1.{ApplicationInfo, ApplicationsListResource, JsonRootResource, UIRoot}
 import org.apache.spark.ui.{SparkUI, UIUtils, WebUI}
 import org.apache.spark.ui.JettyUtils._
 import org.apache.spark.util.{SignalLogger, Utils}
@@ -45,7 +46,7 @@ class HistoryServer(
     provider: ApplicationHistoryProvider,
     securityManager: SecurityManager,
     port: Int)
-  extends WebUI(securityManager, port, conf) with Logging {
+  extends WebUI(securityManager, port, conf) with Logging with UIRoot {
 
   // How many applications to retain
   private val retainedApplications = conf.getInt("spark.history.retainedApplications", 50)
@@ -56,7 +57,7 @@ class HistoryServer(
       require(parts.length == 1 || parts.length == 2, s"Invalid app key $key")
       val ui = provider
         .getAppUI(parts(0), if (parts.length > 1) Some(parts(1)) else None)
-        .getOrElse(throw new NoSuchElementException())
+        .getOrElse(throw new NoSuchElementException(s"no app with key $key"))
       attachSparkUI(ui)
       ui
     }
@@ -113,6 +114,10 @@ class HistoryServer(
     }
   }
 
+  def getSparkUI(appKey: String): Option[SparkUI] = {
+    Option(appCache.get(appKey))
+  }
+
   initialize()
 
   /**
@@ -123,6 +128,9 @@ class HistoryServer(
    */
   def initialize() {
     attachPage(new HistoryPage(this))
+
+    attachHandler(JsonRootResource.getJsonServlet(this))
+
     attachHandler(createStaticHandler(SparkUI.STATIC_RESOURCE_DIR, "/static"))
 
     val contextHandler = new ServletContextHandler
@@ -160,7 +168,13 @@ class HistoryServer(
    *
    * @return List of all known applications.
    */
-  def getApplicationList(): Iterable[ApplicationHistoryInfo] = provider.getListing()
+  def getApplicationList(): Iterable[ApplicationHistoryInfo] = {
+    provider.getListing()
+  }
+
+  def getApplicationInfoList: Iterator[ApplicationInfo] = {
+    getApplicationList().iterator.map(ApplicationsListResource.appHistoryInfoToPublicAppInfo)
+  }
 
   /**
    * Returns the provider configuration to show in the listing page.
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ApplicationInfo.scala b/core/src/main/scala/org/apache/spark/deploy/master/ApplicationInfo.scala
index f59d550d4f3b3..1620e95bea218 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/ApplicationInfo.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ApplicationInfo.scala
@@ -28,7 +28,7 @@ import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.deploy.ApplicationDescription
 import org.apache.spark.util.Utils
 
-private[deploy] class ApplicationInfo(
+private[spark] class ApplicationInfo(
     val startTime: Long,
     val id: String,
     val desc: ApplicationDescription,
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
index 0fac3cdcf55e7..53e1903a3d125 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
@@ -754,9 +754,9 @@ private[master] class Master(
 
   /**
    * Rebuild a new SparkUI from the given application's event logs.
-   * Return whether this is successful.
+   * Return the UI if successful, else None
    */
-  private def rebuildSparkUI(app: ApplicationInfo): Boolean = {
+  private[master] def rebuildSparkUI(app: ApplicationInfo): Option[SparkUI] = {
     val appName = app.desc.name
     val notFoundBasePath = HistoryServer.UI_PATH_PREFIX + "/not-found"
     try {
@@ -764,7 +764,7 @@ private[master] class Master(
         .getOrElse {
           // Event logging is not enabled for this application
           app.desc.appUiUrl = notFoundBasePath
-          return false
+          return None
         }
 
       val eventLogFilePrefix = EventLoggingListener.getLogPath(
@@ -787,7 +787,7 @@ private[master] class Master(
       val logInput = EventLoggingListener.openEventLog(new Path(eventLogFile), fs)
       val replayBus = new ReplayListenerBus()
       val ui = SparkUI.createHistoryUI(new SparkConf, replayBus, new SecurityManager(conf),
-        appName + status, HistoryServer.UI_PATH_PREFIX + s"/${app.id}")
+        appName + status, HistoryServer.UI_PATH_PREFIX + s"/${app.id}", app.startTime)
       val maybeTruncated = eventLogFile.endsWith(EventLoggingListener.IN_PROGRESS)
       try {
         replayBus.replay(logInput, eventLogFile, maybeTruncated)
@@ -798,7 +798,7 @@ private[master] class Master(
       webUi.attachSparkUI(ui)
       // Application UI is successfully rebuilt, so link the Master UI to it
       app.desc.appUiUrl = ui.basePath
-      true
+      Some(ui)
     } catch {
       case fnf: FileNotFoundException =>
         // Event logging is enabled for this application, but no event logs are found
@@ -808,7 +808,7 @@ private[master] class Master(
         msg += " Did you specify the correct logging directory?"
         msg = URLEncoder.encode(msg, "UTF-8")
         app.desc.appUiUrl = notFoundBasePath + s"?msg=$msg&title=$title"
-        false
+        None
       case e: Exception =>
         // Relay exception message to application UI page
         val title = s"Application history load error (${app.id})"
@@ -817,7 +817,7 @@ private[master] class Master(
         logError(msg, e)
         msg = URLEncoder.encode(msg, "UTF-8")
         app.desc.appUiUrl = notFoundBasePath + s"?msg=$msg&exception=$exception&title=$title"
-        false
+        None
     }
   }
 
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala b/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala
index 273f077bd8f57..06e265f99e231 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala
@@ -23,10 +23,8 @@ import scala.concurrent.Await
 import scala.xml.Node
 
 import akka.pattern.ask
-import org.json4s.JValue
-import org.json4s.JsonAST.JNothing
 
-import org.apache.spark.deploy.{ExecutorState, JsonProtocol}
+import org.apache.spark.deploy.ExecutorState
 import org.apache.spark.deploy.DeployMessages.{MasterStateResponse, RequestMasterState}
 import org.apache.spark.deploy.master.ExecutorDesc
 import org.apache.spark.ui.{UIUtils, WebUIPage}
@@ -37,21 +35,6 @@ private[ui] class ApplicationPage(parent: MasterWebUI) extends WebUIPage("app")
   private val master = parent.masterActorRef
   private val timeout = parent.timeout
 
-  /** Executor details for a particular application */
-  override def renderJson(request: HttpServletRequest): JValue = {
-    val appId = request.getParameter("appId")
-    val stateFuture = (master ? RequestMasterState)(timeout).mapTo[MasterStateResponse]
-    val state = Await.result(stateFuture, timeout)
-    val app = state.activeApps.find(_.id == appId).getOrElse({
-      state.completedApps.find(_.id == appId).getOrElse(null)
-    })
-    if (app == null) {
-      JNothing
-    } else {
-      JsonProtocol.writeApplicationInfo(app)
-    }
-  }
-
   /** Executor details for a particular application */
   def render(request: HttpServletRequest): Seq[Node] = {
     val appId = request.getParameter("appId")
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala b/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala
index 1f2c3fdbfb2bc..756927682cd24 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala
@@ -35,10 +35,13 @@ private[ui] class MasterPage(parent: MasterWebUI) extends WebUIPage("") {
   private val master = parent.masterActorRef
   private val timeout = parent.timeout
 
-  override def renderJson(request: HttpServletRequest): JValue = {
+  def getMasterState: MasterStateResponse = {
     val stateFuture = (master ? RequestMasterState)(timeout).mapTo[MasterStateResponse]
-    val state = Await.result(stateFuture, timeout)
-    JsonProtocol.writeMasterState(state)
+    Await.result(stateFuture, timeout)
+  }
+
+  override def renderJson(request: HttpServletRequest): JValue = {
+    JsonProtocol.writeMasterState(getMasterState)
   }
 
   def handleAppKillRequest(request: HttpServletRequest): Unit = {
@@ -68,8 +71,7 @@ private[ui] class MasterPage(parent: MasterWebUI) extends WebUIPage("") {
 
   /** Index view listing applications and executors */
   def render(request: HttpServletRequest): Seq[Node] = {
-    val stateFuture = (master ? RequestMasterState)(timeout).mapTo[MasterStateResponse]
-    val state = Await.result(stateFuture, timeout)
+    val state = getMasterState
 
     val workerHeaders = Seq("Worker Id", "Address", "State", "Cores", "Memory")
     val workers = state.workers.sortBy(_.id)
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterWebUI.scala b/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterWebUI.scala
index dea0a65eeeaa6..eb26e9f99c70b 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterWebUI.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterWebUI.scala
@@ -19,6 +19,7 @@ package org.apache.spark.deploy.master.ui
 
 import org.apache.spark.Logging
 import org.apache.spark.deploy.master.Master
+import org.apache.spark.status.api.v1.{ApplicationsListResource, ApplicationInfo, JsonRootResource, UIRoot}
 import org.apache.spark.ui.{SparkUI, WebUI}
 import org.apache.spark.ui.JettyUtils._
 import org.apache.spark.util.RpcUtils
@@ -28,12 +29,15 @@ import org.apache.spark.util.RpcUtils
  */
 private[master]
 class MasterWebUI(val master: Master, requestedPort: Int)
-  extends WebUI(master.securityMgr, requestedPort, master.conf, name = "MasterUI") with Logging {
+  extends WebUI(master.securityMgr, requestedPort, master.conf, name = "MasterUI") with Logging
+  with UIRoot {
 
   val masterActorRef = master.self
   val timeout = RpcUtils.askTimeout(master.conf)
   val killEnabled = master.conf.getBoolean("spark.ui.killEnabled", true)
 
+  val masterPage = new MasterPage(this)
+
   initialize()
 
   /** Initialize all components of the server. */
@@ -43,6 +47,7 @@ class MasterWebUI(val master: Master, requestedPort: Int)
     attachPage(new HistoryNotFoundPage(this))
     attachPage(masterPage)
     attachHandler(createStaticHandler(MasterWebUI.STATIC_RESOURCE_DIR, "/static"))
+    attachHandler(JsonRootResource.getJsonServlet(this))
     attachHandler(createRedirectHandler(
       "/app/kill", "/", masterPage.handleAppKillRequest, httpMethods = Set("POST")))
     attachHandler(createRedirectHandler(
@@ -60,6 +65,23 @@ class MasterWebUI(val master: Master, requestedPort: Int)
     assert(serverInfo.isDefined, "Master UI must be bound to a server before detaching SparkUIs")
     ui.getHandlers.foreach(detachHandler)
   }
+
+  def getApplicationInfoList: Iterator[ApplicationInfo] = {
+    val state = masterPage.getMasterState
+    val activeApps = state.activeApps.sortBy(_.startTime).reverse
+    val completedApps = state.completedApps.sortBy(_.endTime).reverse
+    activeApps.iterator.map { ApplicationsListResource.convertApplicationInfo(_, false) } ++
+      completedApps.iterator.map { ApplicationsListResource.convertApplicationInfo(_, true) }
+  }
+
+  def getSparkUI(appId: String): Option[SparkUI] = {
+    val state = masterPage.getMasterState
+    val activeApps = state.activeApps.sortBy(_.startTime).reverse
+    val completedApps = state.completedApps.sortBy(_.endTime).reverse
+    (activeApps ++ completedApps).find { _.id == appId }.flatMap {
+      master.rebuildSparkUI
+    }
+  }
 }
 
 private[master] object MasterWebUI {
diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/AllJobsResource.scala b/core/src/main/scala/org/apache/spark/status/api/v1/AllJobsResource.scala
new file mode 100644
index 0000000000000..5783df5d8220c
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/status/api/v1/AllJobsResource.scala
@@ -0,0 +1,98 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.status.api.v1
+
+import java.util.{Arrays, Date, List => JList}
+import javax.ws.rs._
+import javax.ws.rs.core.MediaType
+
+import org.apache.spark.JobExecutionStatus
+import org.apache.spark.ui.SparkUI
+import org.apache.spark.ui.jobs.JobProgressListener
+import org.apache.spark.ui.jobs.UIData.JobUIData
+
+@Produces(Array(MediaType.APPLICATION_JSON))
+private[v1] class AllJobsResource(ui: SparkUI) {
+
+  @GET
+  def jobsList(@QueryParam("status") statuses: JList[JobExecutionStatus]): Seq[JobData] = {
+    val statusToJobs: Seq[(JobExecutionStatus, Seq[JobUIData])] =
+      AllJobsResource.getStatusToJobs(ui)
+    val adjStatuses: JList[JobExecutionStatus] = {
+      if (statuses.isEmpty) {
+        Arrays.asList(JobExecutionStatus.values(): _*)
+      } else {
+        statuses
+      }
+    }
+    val jobInfos = for {
+      (status, jobs) <- statusToJobs
+      job <- jobs if adjStatuses.contains(status)
+    } yield {
+      AllJobsResource.convertJobData(job, ui.jobProgressListener, false)
+    }
+    jobInfos.sortBy{- _.jobId}
+  }
+
+}
+
+private[v1] object AllJobsResource {
+
+  def getStatusToJobs(ui: SparkUI): Seq[(JobExecutionStatus, Seq[JobUIData])] = {
+    val statusToJobs = ui.jobProgressListener.synchronized {
+      Seq(
+        JobExecutionStatus.RUNNING -> ui.jobProgressListener.activeJobs.values.toSeq,
+        JobExecutionStatus.SUCCEEDED -> ui.jobProgressListener.completedJobs.toSeq,
+        JobExecutionStatus.FAILED -> ui.jobProgressListener.failedJobs.reverse.toSeq
+      )
+    }
+    statusToJobs
+  }
+
+  def convertJobData(
+      job: JobUIData,
+      listener: JobProgressListener,
+      includeStageDetails: Boolean): JobData = {
+    listener.synchronized {
+      val lastStageInfo = listener.stageIdToInfo.get(job.stageIds.max)
+      val lastStageData = lastStageInfo.flatMap { s =>
+        listener.stageIdToData.get((s.stageId, s.attemptId))
+      }
+      val lastStageName = lastStageInfo.map { _.name }.getOrElse("(Unknown Stage Name)")
+      val lastStageDescription = lastStageData.flatMap { _.description }
+      new JobData(
+        jobId = job.jobId,
+        name = lastStageName,
+        description = lastStageDescription,
+        submissionTime = job.submissionTime.map{new Date(_)},
+        completionTime = job.completionTime.map{new Date(_)},
+        stageIds = job.stageIds,
+        jobGroup = job.jobGroup,
+        status = job.status,
+        numTasks = job.numTasks,
+        numActiveTasks = job.numActiveTasks,
+        numCompletedTasks = job.numCompletedTasks,
+        numSkippedTasks = job.numCompletedTasks,
+        numFailedTasks = job.numFailedTasks,
+        numActiveStages = job.numActiveStages,
+        numCompletedStages = job.completedStageIndices.size,
+        numSkippedStages = job.numSkippedStages,
+        numFailedStages = job.numFailedStages
+      )
+    }
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/AllRDDResource.scala b/core/src/main/scala/org/apache/spark/status/api/v1/AllRDDResource.scala
new file mode 100644
index 0000000000000..645ede26a0879
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/status/api/v1/AllRDDResource.scala
@@ -0,0 +1,104 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.status.api.v1
+
+import javax.ws.rs.{GET, Produces}
+import javax.ws.rs.core.MediaType
+
+import org.apache.spark.storage.{RDDInfo, StorageStatus, StorageUtils}
+import org.apache.spark.ui.SparkUI
+import org.apache.spark.ui.storage.StorageListener
+
+@Produces(Array(MediaType.APPLICATION_JSON))
+private[v1] class AllRDDResource(ui: SparkUI) {
+
+  @GET
+  def rddList(): Seq[RDDStorageInfo] = {
+    val storageStatusList = ui.storageListener.storageStatusList
+    val rddInfos = ui.storageListener.rddInfoList
+    rddInfos.map{rddInfo =>
+      AllRDDResource.getRDDStorageInfo(rddInfo.id, rddInfo, storageStatusList,
+        includeDetails = false)
+    }
+  }
+
+}
+
+private[spark] object AllRDDResource {
+
+  def getRDDStorageInfo(
+      rddId: Int,
+      listener: StorageListener,
+      includeDetails: Boolean): Option[RDDStorageInfo] = {
+    val storageStatusList = listener.storageStatusList
+    listener.rddInfoList.find { _.id == rddId }.map { rddInfo =>
+      getRDDStorageInfo(rddId, rddInfo, storageStatusList, includeDetails)
+    }
+  }
+
+  def getRDDStorageInfo(
+      rddId: Int,
+      rddInfo: RDDInfo,
+      storageStatusList: Seq[StorageStatus],
+      includeDetails: Boolean): RDDStorageInfo = {
+    val workers = storageStatusList.map { (rddId, _) }
+    val blockLocations = StorageUtils.getRddBlockLocations(rddId, storageStatusList)
+    val blocks = storageStatusList
+      .flatMap { _.rddBlocksById(rddId) }
+      .sortWith { _._1.name < _._1.name }
+      .map { case (blockId, status) =>
+        (blockId, status, blockLocations.get(blockId).getOrElse(Seq[String]("Unknown")))
+      }
+
+    val dataDistribution = if (includeDetails) {
+      Some(storageStatusList.map { status =>
+        new RDDDataDistribution(
+          address = status.blockManagerId.hostPort,
+          memoryUsed = status.memUsedByRdd(rddId),
+          memoryRemaining = status.memRemaining,
+          diskUsed = status.diskUsedByRdd(rddId)
+        ) } )
+    } else {
+      None
+    }
+    val partitions = if (includeDetails) {
+      Some(blocks.map { case (id, block, locations) =>
+        new RDDPartitionInfo(
+          blockName = id.name,
+          storageLevel = block.storageLevel.description,
+          memoryUsed = block.memSize,
+          diskUsed = block.diskSize,
+          executors = locations
+        )
+      } )
+    } else {
+      None
+    }
+
+    new RDDStorageInfo(
+      id = rddId,
+      name = rddInfo.name,
+      numPartitions = rddInfo.numPartitions,
+      numCachedPartitions = rddInfo.numCachedPartitions,
+      storageLevel = rddInfo.storageLevel.description,
+      memoryUsed = rddInfo.memSize,
+      diskUsed = rddInfo.diskSize,
+      dataDistribution = dataDistribution,
+      partitions = partitions
+    )
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/AllStagesResource.scala b/core/src/main/scala/org/apache/spark/status/api/v1/AllStagesResource.scala
new file mode 100644
index 0000000000000..50608588f09ae
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/status/api/v1/AllStagesResource.scala
@@ -0,0 +1,309 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.status.api.v1
+
+import java.util.{Arrays, Date, List => JList}
+import javax.ws.rs.{GET, PathParam, Produces, QueryParam}
+import javax.ws.rs.core.MediaType
+
+import org.apache.spark.executor.{InputMetrics => InternalInputMetrics, OutputMetrics => InternalOutputMetrics, ShuffleReadMetrics => InternalShuffleReadMetrics, ShuffleWriteMetrics => InternalShuffleWriteMetrics, TaskMetrics => InternalTaskMetrics}
+import org.apache.spark.scheduler.{AccumulableInfo => InternalAccumulableInfo, StageInfo}
+import org.apache.spark.ui.SparkUI
+import org.apache.spark.ui.jobs.UIData.{StageUIData, TaskUIData}
+import org.apache.spark.util.Distribution
+
+@Produces(Array(MediaType.APPLICATION_JSON))
+private[v1] class AllStagesResource(ui: SparkUI) {
+
+  @GET
+  def stageList(@QueryParam("status") statuses: JList[StageStatus]): Seq[StageData] = {
+    val listener = ui.jobProgressListener
+    val stageAndStatus = AllStagesResource.stagesAndStatus(ui)
+    val adjStatuses = {
+      if (statuses.isEmpty()) {
+        Arrays.asList(StageStatus.values(): _*)
+      } else {
+        statuses
+      }
+    }
+    for {
+      (status, stageList) <- stageAndStatus
+      stageInfo: StageInfo <- stageList if adjStatuses.contains(status)
+      stageUiData: StageUIData <- listener.synchronized {
+        listener.stageIdToData.get((stageInfo.stageId, stageInfo.attemptId))
+      }
+    } yield {
+      AllStagesResource.stageUiToStageData(status, stageInfo, stageUiData, includeDetails = false)
+    }
+  }
+}
+
+private[v1] object AllStagesResource {
+  def stageUiToStageData(
+      status: StageStatus,
+      stageInfo: StageInfo,
+      stageUiData: StageUIData,
+      includeDetails: Boolean): StageData = {
+
+    val taskData = if (includeDetails) {
+      Some(stageUiData.taskData.map { case (k, v) => k -> convertTaskData(v) } )
+    } else {
+      None
+    }
+    val executorSummary = if (includeDetails) {
+      Some(stageUiData.executorSummary.map { case (k, summary) =>
+        k -> new ExecutorStageSummary(
+          taskTime = summary.taskTime,
+          failedTasks = summary.failedTasks,
+          succeededTasks = summary.succeededTasks,
+          inputBytes = summary.inputBytes,
+          outputBytes = summary.outputBytes,
+          shuffleRead = summary.shuffleRead,
+          shuffleWrite = summary.shuffleWrite,
+          memoryBytesSpilled = summary.memoryBytesSpilled,
+          diskBytesSpilled = summary.diskBytesSpilled
+        )
+      })
+    } else {
+      None
+    }
+
+    val accumulableInfo = stageUiData.accumulables.values.map { convertAccumulableInfo }.toSeq
+
+    new StageData(
+      status = status,
+      stageId = stageInfo.stageId,
+      attemptId = stageInfo.attemptId,
+      numActiveTasks = stageUiData.numActiveTasks,
+      numCompleteTasks = stageUiData.numCompleteTasks,
+      numFailedTasks = stageUiData.numFailedTasks,
+      executorRunTime = stageUiData.executorRunTime,
+      inputBytes = stageUiData.inputBytes,
+      inputRecords = stageUiData.inputRecords,
+      outputBytes = stageUiData.outputBytes,
+      outputRecords = stageUiData.outputRecords,
+      shuffleReadBytes = stageUiData.shuffleReadTotalBytes,
+      shuffleReadRecords = stageUiData.shuffleReadRecords,
+      shuffleWriteBytes = stageUiData.shuffleWriteBytes,
+      shuffleWriteRecords = stageUiData.shuffleWriteRecords,
+      memoryBytesSpilled = stageUiData.memoryBytesSpilled,
+      diskBytesSpilled = stageUiData.diskBytesSpilled,
+      schedulingPool = stageUiData.schedulingPool,
+      name = stageInfo.name,
+      details = stageInfo.details,
+      accumulatorUpdates = accumulableInfo,
+      tasks = taskData,
+      executorSummary = executorSummary
+    )
+  }
+
+  def stagesAndStatus(ui: SparkUI): Seq[(StageStatus, Seq[StageInfo])] = {
+    val listener = ui.jobProgressListener
+    listener.synchronized {
+      Seq(
+        StageStatus.ACTIVE -> listener.activeStages.values.toSeq,
+        StageStatus.COMPLETE -> listener.completedStages.reverse.toSeq,
+        StageStatus.FAILED -> listener.failedStages.reverse.toSeq,
+        StageStatus.PENDING -> listener.pendingStages.values.toSeq
+      )
+    }
+  }
+
+  def convertTaskData(uiData: TaskUIData): TaskData = {
+    new TaskData(
+      taskId = uiData.taskInfo.taskId,
+      index = uiData.taskInfo.index,
+      attempt = uiData.taskInfo.attempt,
+      launchTime = new Date(uiData.taskInfo.launchTime),
+      executorId = uiData.taskInfo.executorId,
+      host = uiData.taskInfo.host,
+      taskLocality = uiData.taskInfo.taskLocality.toString(),
+      speculative = uiData.taskInfo.speculative,
+      accumulatorUpdates = uiData.taskInfo.accumulables.map { convertAccumulableInfo },
+      errorMessage = uiData.errorMessage,
+      taskMetrics = uiData.taskMetrics.map { convertUiTaskMetrics }
+    )
+  }
+
+  def taskMetricDistributions(
+      allTaskData: Iterable[TaskUIData],
+      quantiles: Array[Double]): TaskMetricDistributions = {
+
+    val rawMetrics = allTaskData.flatMap{_.taskMetrics}.toSeq
+
+    def metricQuantiles(f: InternalTaskMetrics => Double): IndexedSeq[Double] =
+      Distribution(rawMetrics.map { d => f(d) }).get.getQuantiles(quantiles)
+
+    // We need to do a lot of similar munging to nested metrics here.  For each one,
+    // we want (a) extract the values for nested metrics (b) make a distribution for each metric
+    // (c) shove the distribution into the right field in our return type and (d) only return
+    // a result if the option is defined for any of the tasks.  MetricHelper is a little util
+    // to make it a little easier to deal w/ all of the nested options.  Mostly it lets us just
+    // implement one "build" method, which just builds the quantiles for each field.
+
+    val inputMetrics: Option[InputMetricDistributions] =
+      new MetricHelper[InternalInputMetrics, InputMetricDistributions](rawMetrics, quantiles) {
+        def getSubmetrics(raw: InternalTaskMetrics): Option[InternalInputMetrics] = {
+          raw.inputMetrics
+        }
+
+        def build: InputMetricDistributions = new InputMetricDistributions(
+          bytesRead = submetricQuantiles(_.bytesRead),
+          recordsRead = submetricQuantiles(_.recordsRead)
+        )
+      }.metricOption
+
+    val outputMetrics: Option[OutputMetricDistributions] =
+      new MetricHelper[InternalOutputMetrics, OutputMetricDistributions](rawMetrics, quantiles) {
+        def getSubmetrics(raw:InternalTaskMetrics): Option[InternalOutputMetrics] = {
+          raw.outputMetrics
+        }
+        def build: OutputMetricDistributions = new OutputMetricDistributions(
+          bytesWritten = submetricQuantiles(_.bytesWritten),
+          recordsWritten = submetricQuantiles(_.recordsWritten)
+        )
+      }.metricOption
+
+    val shuffleReadMetrics: Option[ShuffleReadMetricDistributions] =
+      new MetricHelper[InternalShuffleReadMetrics, ShuffleReadMetricDistributions](rawMetrics,
+        quantiles) {
+        def getSubmetrics(raw: InternalTaskMetrics): Option[InternalShuffleReadMetrics] = {
+          raw.shuffleReadMetrics
+        }
+        def build: ShuffleReadMetricDistributions = new ShuffleReadMetricDistributions(
+          readBytes = submetricQuantiles(_.totalBytesRead),
+          readRecords = submetricQuantiles(_.recordsRead),
+          remoteBytesRead = submetricQuantiles(_.remoteBytesRead),
+          remoteBlocksFetched = submetricQuantiles(_.remoteBlocksFetched),
+          localBlocksFetched = submetricQuantiles(_.localBlocksFetched),
+          totalBlocksFetched = submetricQuantiles(_.totalBlocksFetched),
+          fetchWaitTime = submetricQuantiles(_.fetchWaitTime)
+        )
+      }.metricOption
+
+    val shuffleWriteMetrics: Option[ShuffleWriteMetricDistributions] =
+      new MetricHelper[InternalShuffleWriteMetrics, ShuffleWriteMetricDistributions](rawMetrics,
+        quantiles) {
+        def getSubmetrics(raw: InternalTaskMetrics): Option[InternalShuffleWriteMetrics] = {
+          raw.shuffleWriteMetrics
+        }
+        def build: ShuffleWriteMetricDistributions = new ShuffleWriteMetricDistributions(
+          writeBytes = submetricQuantiles(_.shuffleBytesWritten),
+          writeRecords = submetricQuantiles(_.shuffleRecordsWritten),
+          writeTime = submetricQuantiles(_.shuffleWriteTime)
+        )
+      }.metricOption
+
+    new TaskMetricDistributions(
+      quantiles = quantiles,
+      executorDeserializeTime = metricQuantiles(_.executorDeserializeTime),
+      executorRunTime = metricQuantiles(_.executorRunTime),
+      resultSize = metricQuantiles(_.resultSize),
+      jvmGcTime = metricQuantiles(_.jvmGCTime),
+      resultSerializationTime = metricQuantiles(_.resultSerializationTime),
+      memoryBytesSpilled = metricQuantiles(_.memoryBytesSpilled),
+      diskBytesSpilled = metricQuantiles(_.diskBytesSpilled),
+      inputMetrics = inputMetrics,
+      outputMetrics = outputMetrics,
+      shuffleReadMetrics = shuffleReadMetrics,
+      shuffleWriteMetrics = shuffleWriteMetrics
+    )
+  }
+
+  def convertAccumulableInfo(acc: InternalAccumulableInfo): AccumulableInfo = {
+    new AccumulableInfo(acc.id, acc.name, acc.update, acc.value)
+  }
+
+  def convertUiTaskMetrics(internal: InternalTaskMetrics): TaskMetrics = {
+    new TaskMetrics(
+      executorDeserializeTime = internal.executorDeserializeTime,
+      executorRunTime = internal.executorRunTime,
+      resultSize = internal.resultSize,
+      jvmGcTime = internal.jvmGCTime,
+      resultSerializationTime = internal.resultSerializationTime,
+      memoryBytesSpilled = internal.memoryBytesSpilled,
+      diskBytesSpilled = internal.diskBytesSpilled,
+      inputMetrics = internal.inputMetrics.map { convertInputMetrics },
+      outputMetrics = Option(internal.outputMetrics).flatten.map { convertOutputMetrics },
+      shuffleReadMetrics = internal.shuffleReadMetrics.map { convertShuffleReadMetrics },
+      shuffleWriteMetrics = internal.shuffleWriteMetrics.map { convertShuffleWriteMetrics }
+    )
+  }
+
+  def convertInputMetrics(internal: InternalInputMetrics): InputMetrics = {
+    new InputMetrics(
+      bytesRead = internal.bytesRead,
+      recordsRead = internal.recordsRead
+    )
+  }
+
+  def convertOutputMetrics(internal: InternalOutputMetrics): OutputMetrics = {
+    new OutputMetrics(
+      bytesWritten = internal.bytesWritten,
+      recordsWritten = internal.recordsWritten
+    )
+  }
+
+  def convertShuffleReadMetrics(internal: InternalShuffleReadMetrics): ShuffleReadMetrics = {
+    new ShuffleReadMetrics(
+      remoteBlocksFetched = internal.remoteBlocksFetched,
+      localBlocksFetched = internal.localBlocksFetched,
+      fetchWaitTime = internal.fetchWaitTime,
+      remoteBytesRead = internal.remoteBytesRead,
+      totalBlocksFetched = internal.totalBlocksFetched,
+      recordsRead = internal.recordsRead
+    )
+  }
+
+  def convertShuffleWriteMetrics(internal: InternalShuffleWriteMetrics): ShuffleWriteMetrics = {
+    new ShuffleWriteMetrics(
+      bytesWritten = internal.shuffleBytesWritten,
+      writeTime = internal.shuffleWriteTime,
+      recordsWritten = internal.shuffleRecordsWritten
+    )
+  }
+}
+
+/**
+ * Helper for getting distributions from nested metric types.  Many of the metrics we want are
+ * contained in options inside TaskMetrics (eg., ShuffleWriteMetrics). This makes it easy to handle
+ * the options (returning None if the metrics are all empty), and extract the quantiles for each
+ * metric.  After creating an instance, call metricOption to get the result type.
+ */
+private[v1] abstract class MetricHelper[I,O](
+    rawMetrics: Seq[InternalTaskMetrics],
+    quantiles: Array[Double]) {
+
+  def getSubmetrics(raw: InternalTaskMetrics): Option[I]
+
+  def build: O
+
+  val data: Seq[I] = rawMetrics.flatMap(getSubmetrics)
+
+  /** applies the given function to all input metrics, and returns the quantiles */
+  def submetricQuantiles(f: I => Double): IndexedSeq[Double] = {
+    Distribution(data.map { d => f(d) }).get.getQuantiles(quantiles)
+  }
+
+  def metricOption: Option[O] = {
+    if (data.isEmpty) {
+      None
+    } else {
+      Some(build)
+    }
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/ApplicationListResource.scala b/core/src/main/scala/org/apache/spark/status/api/v1/ApplicationListResource.scala
new file mode 100644
index 0000000000000..17b521f3e1d41
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/status/api/v1/ApplicationListResource.scala
@@ -0,0 +1,94 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.status.api.v1
+
+import java.util.{Arrays, Date, List => JList}
+import javax.ws.rs.{DefaultValue, GET, Produces, QueryParam}
+import javax.ws.rs.core.MediaType
+
+import org.apache.spark.deploy.history.ApplicationHistoryInfo
+import org.apache.spark.deploy.master.{ApplicationInfo => InternalApplicationInfo}
+
+@Produces(Array(MediaType.APPLICATION_JSON))
+private[v1] class ApplicationListResource(uiRoot: UIRoot) {
+
+  @GET
+  def appList(
+      @QueryParam("status") status: JList[ApplicationStatus],
+      @DefaultValue("2010-01-01") @QueryParam("minDate") minDate: SimpleDateParam,
+      @DefaultValue("3000-01-01") @QueryParam("maxDate") maxDate: SimpleDateParam)
+  : Iterator[ApplicationInfo] = {
+    val allApps = uiRoot.getApplicationInfoList
+    val adjStatus = {
+      if (status.isEmpty) {
+        Arrays.asList(ApplicationStatus.values(): _*)
+      } else {
+        status
+      }
+    }
+    val includeCompleted = adjStatus.contains(ApplicationStatus.COMPLETED)
+    val includeRunning = adjStatus.contains(ApplicationStatus.RUNNING)
+    allApps.filter { app =>
+      val anyRunning = app.attempts.exists(!_.completed)
+      // if any attempt is still running, we consider the app to also still be running
+      val statusOk = (!anyRunning && includeCompleted) ||
+        (anyRunning && includeRunning)
+      // keep the app if *any* attempts fall in the right time window
+      val dateOk = app.attempts.exists { attempt =>
+        attempt.startTime.getTime >= minDate.timestamp &&
+          attempt.startTime.getTime <= maxDate.timestamp
+      }
+      statusOk && dateOk
+    }
+  }
+}
+
+private[spark] object ApplicationsListResource {
+  def appHistoryInfoToPublicAppInfo(app: ApplicationHistoryInfo): ApplicationInfo = {
+    new ApplicationInfo(
+      id = app.id,
+      name = app.name,
+      attempts = app.attempts.map { internalAttemptInfo =>
+        new ApplicationAttemptInfo(
+          attemptId = internalAttemptInfo.attemptId,
+          startTime = new Date(internalAttemptInfo.startTime),
+          endTime = new Date(internalAttemptInfo.endTime),
+          sparkUser = internalAttemptInfo.sparkUser,
+          completed = internalAttemptInfo.completed
+        )
+      }
+    )
+  }
+
+  def convertApplicationInfo(
+      internal: InternalApplicationInfo,
+      completed: Boolean): ApplicationInfo = {
+    // standalone application info always has just one attempt
+    new ApplicationInfo(
+      id = internal.id,
+      name = internal.desc.name,
+      attempts = Seq(new ApplicationAttemptInfo(
+        attemptId = None,
+        startTime = new Date(internal.startTime),
+        endTime = new Date(internal.endTime),
+        sparkUser = internal.desc.user,
+        completed = completed
+      ))
+    )
+  }
+
+}
diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/ExecutorListResource.scala b/core/src/main/scala/org/apache/spark/status/api/v1/ExecutorListResource.scala
new file mode 100644
index 0000000000000..8ad4656b4dada
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/status/api/v1/ExecutorListResource.scala
@@ -0,0 +1,36 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*    http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.spark.status.api.v1
+
+import javax.ws.rs.{GET, PathParam, Produces}
+import javax.ws.rs.core.MediaType
+
+import org.apache.spark.ui.SparkUI
+import org.apache.spark.ui.exec.ExecutorsPage
+
+@Produces(Array(MediaType.APPLICATION_JSON))
+private[v1] class ExecutorListResource(ui: SparkUI) {
+
+  @GET
+  def executorList(): Seq[ExecutorSummary] = {
+    val listener = ui.executorsListener
+    val storageStatusList = listener.storageStatusList
+    (0 until storageStatusList.size).map { statusId =>
+      ExecutorsPage.getExecInfo(listener, statusId)
+    }
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/JacksonMessageWriter.scala b/core/src/main/scala/org/apache/spark/status/api/v1/JacksonMessageWriter.scala
new file mode 100644
index 0000000000000..202a5191ad57d
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/status/api/v1/JacksonMessageWriter.scala
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.status.api.v1
+
+import java.io.OutputStream
+import java.lang.annotation.Annotation
+import java.lang.reflect.Type
+import java.text.SimpleDateFormat
+import java.util.{Calendar, SimpleTimeZone}
+import javax.ws.rs.Produces
+import javax.ws.rs.core.{MediaType, MultivaluedMap}
+import javax.ws.rs.ext.{MessageBodyWriter, Provider}
+
+import com.fasterxml.jackson.annotation.JsonInclude
+import com.fasterxml.jackson.databind.{ObjectMapper, SerializationFeature}
+
+/**
+ * This class converts the POJO metric responses into json, using jackson.
+ *
+ * This doesn't follow the standard jersey-jackson plugin options, because we want to stick
+ * with an old version of jersey (since we have it from yarn anyway) and don't want to pull in lots
+ * of dependencies from a new plugin.
+ *
+ * Note that jersey automatically discovers this class based on its package and its annotations.
+ */
+@Provider
+@Produces(Array(MediaType.APPLICATION_JSON))
+private[v1] class JacksonMessageWriter extends MessageBodyWriter[Object]{
+
+  val mapper = new ObjectMapper() {
+    override def writeValueAsString(t: Any): String = {
+      super.writeValueAsString(t)
+    }
+  }
+  mapper.registerModule(com.fasterxml.jackson.module.scala.DefaultScalaModule)
+  mapper.enable(SerializationFeature.INDENT_OUTPUT)
+  mapper.setSerializationInclusion(JsonInclude.Include.NON_NULL)
+  mapper.setDateFormat(JacksonMessageWriter.makeISODateFormat)
+
+  override def isWriteable(
+      aClass: Class[_],
+      `type`: Type,
+      annotations: Array[Annotation],
+      mediaType: MediaType): Boolean = {
+      true
+  }
+
+  override def writeTo(
+      t: Object,
+      aClass: Class[_],
+      `type`: Type,
+      annotations: Array[Annotation],
+      mediaType: MediaType,
+      multivaluedMap: MultivaluedMap[String, AnyRef],
+      outputStream: OutputStream): Unit = {
+    t match {
+      case ErrorWrapper(err) => outputStream.write(err.getBytes("utf-8"))
+      case _ => mapper.writeValue(outputStream, t)
+    }
+  }
+
+  override def getSize(
+      t: Object,
+      aClass: Class[_],
+      `type`: Type,
+      annotations: Array[Annotation],
+      mediaType: MediaType): Long = {
+    -1L
+  }
+}
+
+private[spark] object JacksonMessageWriter {
+  def makeISODateFormat: SimpleDateFormat = {
+    val iso8601 = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'GMT'")
+    val cal = Calendar.getInstance(new SimpleTimeZone(0, "GMT"))
+    iso8601.setCalendar(cal)
+    iso8601
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/JsonRootResource.scala b/core/src/main/scala/org/apache/spark/status/api/v1/JsonRootResource.scala
new file mode 100644
index 0000000000000..c3ec45f54681b
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/status/api/v1/JsonRootResource.scala
@@ -0,0 +1,255 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.status.api.v1
+
+import javax.servlet.ServletContext
+import javax.ws.rs._
+import javax.ws.rs.core.{Context, Response}
+
+import com.sun.jersey.api.core.ResourceConfig
+import com.sun.jersey.spi.container.servlet.ServletContainer
+import org.eclipse.jetty.server.handler.ContextHandler
+import org.eclipse.jetty.servlet.{ServletContextHandler, ServletHolder}
+
+import org.apache.spark.SecurityManager
+import org.apache.spark.ui.SparkUI
+
+/**
+ * Main entry point for serving spark application metrics as json, using JAX-RS.
+ *
+ * Each resource should have endpoints that return **public** classes defined in api.scala.  Mima
+ * binary compatibility checks ensure that we don't inadvertently make changes that break the api.
+ * The returned objects are automatically converted to json by jackson with JacksonMessageWriter.
+ * In addition, there are a number of tests in HistoryServerSuite that compare the json to "golden
+ * files".  Any changes and additions should be reflected there as well -- see the notes in
+ * HistoryServerSuite.
+ */
+@Path("/v1")
+private[v1] class JsonRootResource extends UIRootFromServletContext {
+
+  @Path("applications")
+  def getApplicationList(): ApplicationListResource = {
+    new ApplicationListResource(uiRoot)
+  }
+
+  @Path("applications/{appId}")
+  def getApplication(): OneApplicationResource = {
+    new OneApplicationResource(uiRoot)
+  }
+
+  @Path("applications/{appId}/{attemptId}/jobs")
+  def getJobs(
+      @PathParam("appId") appId: String,
+      @PathParam("attemptId") attemptId: String): AllJobsResource = {
+    uiRoot.withSparkUI(appId, Some(attemptId)) { ui =>
+      new AllJobsResource(ui)
+    }
+  }
+
+  @Path("applications/{appId}/jobs")
+  def getJobs(@PathParam("appId") appId: String): AllJobsResource = {
+    uiRoot.withSparkUI(appId, None) { ui =>
+      new AllJobsResource(ui)
+    }
+  }
+
+  @Path("applications/{appId}/jobs/{jobId: \\d+}")
+  def getJob(@PathParam("appId") appId: String): OneJobResource = {
+    uiRoot.withSparkUI(appId, None) { ui =>
+      new OneJobResource(ui)
+    }
+  }
+
+  @Path("applications/{appId}/{attemptId}/jobs/{jobId: \\d+}")
+  def getJob(
+      @PathParam("appId") appId: String,
+      @PathParam("attemptId") attemptId: String): OneJobResource = {
+    uiRoot.withSparkUI(appId, Some(attemptId)) { ui =>
+      new OneJobResource(ui)
+    }
+  }
+
+  @Path("applications/{appId}/executors")
+  def getExecutors(@PathParam("appId") appId: String): ExecutorListResource = {
+    uiRoot.withSparkUI(appId, None) { ui =>
+      new ExecutorListResource(ui)
+    }
+  }
+
+  @Path("applications/{appId}/{attemptId}/executors")
+  def getExecutors(
+      @PathParam("appId") appId: String,
+      @PathParam("attemptId") attemptId: String): ExecutorListResource = {
+    uiRoot.withSparkUI(appId, Some(attemptId)) { ui =>
+      new ExecutorListResource(ui)
+    }
+  }
+
+
+  @Path("applications/{appId}/stages")
+  def getStages(@PathParam("appId") appId: String): AllStagesResource= {
+    uiRoot.withSparkUI(appId, None) { ui =>
+      new AllStagesResource(ui)
+    }
+  }
+
+  @Path("applications/{appId}/{attemptId}/stages")
+  def getStages(
+      @PathParam("appId") appId: String,
+      @PathParam("attemptId") attemptId: String): AllStagesResource= {
+    uiRoot.withSparkUI(appId, Some(attemptId)) { ui =>
+      new AllStagesResource(ui)
+    }
+  }
+
+  @Path("applications/{appId}/stages/{stageId: \\d+}")
+  def getStage(@PathParam("appId") appId: String): OneStageResource= {
+    uiRoot.withSparkUI(appId, None) { ui =>
+      new OneStageResource(ui)
+    }
+  }
+
+  @Path("applications/{appId}/{attemptId}/stages/{stageId: \\d+}")
+  def getStage(
+      @PathParam("appId") appId: String,
+      @PathParam("attemptId") attemptId: String): OneStageResource = {
+    uiRoot.withSparkUI(appId, Some(attemptId)) { ui =>
+      new OneStageResource(ui)
+    }
+  }
+
+  @Path("applications/{appId}/storage/rdd")
+  def getRdds(@PathParam("appId") appId: String): AllRDDResource = {
+    uiRoot.withSparkUI(appId, None) { ui =>
+      new AllRDDResource(ui)
+    }
+  }
+
+  @Path("applications/{appId}/{attemptId}/storage/rdd")
+  def getRdds(
+      @PathParam("appId") appId: String,
+      @PathParam("attemptId") attemptId: String): AllRDDResource = {
+    uiRoot.withSparkUI(appId, Some(attemptId)) { ui =>
+      new AllRDDResource(ui)
+    }
+  }
+
+  @Path("applications/{appId}/storage/rdd/{rddId: \\d+}")
+  def getRdd(@PathParam("appId") appId: String): OneRDDResource = {
+    uiRoot.withSparkUI(appId, None) { ui =>
+      new OneRDDResource(ui)
+    }
+  }
+
+  @Path("applications/{appId}/{attemptId}/storage/rdd/{rddId: \\d+}")
+  def getRdd(
+      @PathParam("appId") appId: String,
+      @PathParam("attemptId") attemptId: String): OneRDDResource = {
+    uiRoot.withSparkUI(appId, Some(attemptId)) { ui =>
+      new OneRDDResource(ui)
+    }
+  }
+
+}
+
+private[spark] object JsonRootResource {
+
+  def getJsonServlet(uiRoot: UIRoot): ServletContextHandler = {
+    val jerseyContext = new ServletContextHandler(ServletContextHandler.NO_SESSIONS)
+    jerseyContext.setContextPath("/json")
+    val holder:ServletHolder = new ServletHolder(classOf[ServletContainer])
+    holder.setInitParameter("com.sun.jersey.config.property.resourceConfigClass",
+      "com.sun.jersey.api.core.PackagesResourceConfig")
+    holder.setInitParameter("com.sun.jersey.config.property.packages",
+      "org.apache.spark.status.api.v1")
+    holder.setInitParameter(ResourceConfig.PROPERTY_CONTAINER_REQUEST_FILTERS,
+      classOf[SecurityFilter].getCanonicalName)
+    UIRootFromServletContext.setUiRoot(jerseyContext, uiRoot)
+    jerseyContext.addServlet(holder, "/*")
+    jerseyContext
+  }
+}
+
+/**
+ * This trait is shared by the all the root containers for application UI information --
+ * the HistoryServer, the Master UI, and the application UI.  This provides the common
+ * interface needed for them all to expose application info as json.
+ */
+private[spark] trait UIRoot {
+  def getSparkUI(appKey: String): Option[SparkUI]
+  def getApplicationInfoList: Iterator[ApplicationInfo]
+
+  /**
+   * Get the spark UI with the given appID, and apply a function
+   * to it.  If there is no such app, throw an appropriate exception
+   */
+  def withSparkUI[T](appId: String, attemptId: Option[String])(f: SparkUI => T): T = {
+    val appKey = attemptId.map(appId + "/" + _).getOrElse(appId)
+    getSparkUI(appKey) match {
+      case Some(ui) =>
+        f(ui)
+      case None => throw new NotFoundException("no such app: " + appId)
+    }
+  }
+  def securityManager: SecurityManager
+}
+
+private[v1] object UIRootFromServletContext {
+
+  private val attribute = getClass.getCanonicalName
+
+  def setUiRoot(contextHandler: ContextHandler, uiRoot: UIRoot): Unit = {
+    contextHandler.setAttribute(attribute, uiRoot)
+  }
+
+  def getUiRoot(context: ServletContext): UIRoot = {
+    context.getAttribute(attribute).asInstanceOf[UIRoot]
+  }
+}
+
+private[v1] trait UIRootFromServletContext {
+  @Context
+  var servletContext: ServletContext = _
+
+  def uiRoot: UIRoot = UIRootFromServletContext.getUiRoot(servletContext)
+}
+
+private[v1] class NotFoundException(msg: String) extends WebApplicationException(
+  new NoSuchElementException(msg),
+    Response
+      .status(Response.Status.NOT_FOUND)
+      .entity(ErrorWrapper(msg))
+      .build()
+)
+
+private[v1] class BadParameterException(msg: String) extends WebApplicationException(
+  new IllegalArgumentException(msg),
+  Response
+    .status(Response.Status.BAD_REQUEST)
+    .entity(ErrorWrapper(msg))
+    .build()
+) {
+  def this(param: String, exp: String, actual: String) = {
+    this(raw"""Bad value for parameter "$param".  Expected a $exp, got "$actual"""")
+  }
+}
+
+/**
+ * Signal to JacksonMessageWriter to not convert the message into json (which would result in an
+ * extra set of quotes).
+ */
+private[v1] case class ErrorWrapper(s: String)
diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/OneApplicationResource.scala b/core/src/main/scala/org/apache/spark/status/api/v1/OneApplicationResource.scala
new file mode 100644
index 0000000000000..b5ef72649e295
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/status/api/v1/OneApplicationResource.scala
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.status.api.v1
+
+import javax.ws.rs.core.MediaType
+import javax.ws.rs.{Produces, PathParam, GET}
+
+@Produces(Array(MediaType.APPLICATION_JSON))
+private[v1] class OneApplicationResource(uiRoot: UIRoot) {
+
+  @GET
+  def getApp(@PathParam("appId") appId: String): ApplicationInfo = {
+    val apps = uiRoot.getApplicationInfoList.find { _.id == appId }
+    apps.getOrElse(throw new NotFoundException("unknown app: " + appId))
+  }
+
+}
diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/OneJobResource.scala b/core/src/main/scala/org/apache/spark/status/api/v1/OneJobResource.scala
new file mode 100644
index 0000000000000..6d8a60d480aed
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/status/api/v1/OneJobResource.scala
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.status.api.v1
+
+import javax.ws.rs.{PathParam, GET, Produces}
+import javax.ws.rs.core.MediaType
+
+import org.apache.spark.JobExecutionStatus
+import org.apache.spark.ui.SparkUI
+import org.apache.spark.ui.jobs.UIData.JobUIData
+
+@Produces(Array(MediaType.APPLICATION_JSON))
+private[v1] class OneJobResource(ui: SparkUI) {
+
+  @GET
+  def oneJob(@PathParam("jobId") jobId: Int): JobData = {
+    val statusToJobs: Seq[(JobExecutionStatus, Seq[JobUIData])] =
+      AllJobsResource.getStatusToJobs(ui)
+    val jobOpt = statusToJobs.map {_._2} .flatten.find { jobInfo => jobInfo.jobId == jobId}
+    jobOpt.map { job =>
+      AllJobsResource.convertJobData(job, ui.jobProgressListener, false)
+    }.getOrElse {
+      throw new NotFoundException("unknown job: " + jobId)
+    }
+  }
+
+}
diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/OneRDDResource.scala b/core/src/main/scala/org/apache/spark/status/api/v1/OneRDDResource.scala
new file mode 100644
index 0000000000000..07b224fac4786
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/status/api/v1/OneRDDResource.scala
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.status.api.v1
+
+import javax.ws.rs.{PathParam, GET, Produces}
+import javax.ws.rs.core.MediaType
+
+import org.apache.spark.ui.SparkUI
+
+@Produces(Array(MediaType.APPLICATION_JSON))
+private[v1] class OneRDDResource(ui: SparkUI) {
+
+  @GET
+  def rddData(@PathParam("rddId") rddId: Int): RDDStorageInfo  = {
+    AllRDDResource.getRDDStorageInfo(rddId, ui.storageListener, true).getOrElse(
+      throw new NotFoundException(s"no rdd found w/ id $rddId")
+    )
+  }
+
+}
diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/OneStageResource.scala b/core/src/main/scala/org/apache/spark/status/api/v1/OneStageResource.scala
new file mode 100644
index 0000000000000..fd24aea63a8a1
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/status/api/v1/OneStageResource.scala
@@ -0,0 +1,150 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.status.api.v1
+
+import javax.ws.rs._
+import javax.ws.rs.core.MediaType
+
+import org.apache.spark.SparkException
+import org.apache.spark.scheduler.StageInfo
+import org.apache.spark.status.api.v1.StageStatus._
+import org.apache.spark.status.api.v1.TaskSorting._
+import org.apache.spark.ui.SparkUI
+import org.apache.spark.ui.jobs.JobProgressListener
+import org.apache.spark.ui.jobs.UIData.StageUIData
+
+@Produces(Array(MediaType.APPLICATION_JSON))
+private[v1] class OneStageResource(ui: SparkUI) {
+
+  @GET
+  @Path("")
+  def stageData(@PathParam("stageId") stageId: Int): Seq[StageData] = {
+    withStage(stageId){ stageAttempts =>
+      stageAttempts.map { stage =>
+        AllStagesResource.stageUiToStageData(stage.status, stage.info, stage.ui,
+          includeDetails = true)
+      }
+    }
+  }
+
+  @GET
+  @Path("/{stageAttemptId: \\d+}")
+  def oneAttemptData(
+      @PathParam("stageId") stageId: Int,
+      @PathParam("stageAttemptId") stageAttemptId: Int): StageData = {
+    withStageAttempt(stageId, stageAttemptId) { stage =>
+      AllStagesResource.stageUiToStageData(stage.status, stage.info, stage.ui,
+        includeDetails = true)
+    }
+  }
+
+  @GET
+  @Path("/{stageAttemptId: \\d+}/taskSummary")
+  def taskSummary(
+      @PathParam("stageId") stageId: Int,
+      @PathParam("stageAttemptId") stageAttemptId: Int,
+      @DefaultValue("0.05,0.25,0.5,0.75,0.95") @QueryParam("quantiles") quantileString: String)
+  : TaskMetricDistributions = {
+    withStageAttempt(stageId, stageAttemptId) { stage =>
+      val quantiles = quantileString.split(",").map { s =>
+        try {
+          s.toDouble
+        } catch {
+          case nfe: NumberFormatException =>
+            throw new BadParameterException("quantiles", "double", s)
+        }
+      }
+      AllStagesResource.taskMetricDistributions(stage.ui.taskData.values, quantiles)
+    }
+  }
+
+  @GET
+  @Path("/{stageAttemptId: \\d+}/taskList")
+  def taskList(
+      @PathParam("stageId") stageId: Int,
+      @PathParam("stageAttemptId") stageAttemptId: Int,
+      @DefaultValue("0") @QueryParam("offset") offset: Int,
+      @DefaultValue("20") @QueryParam("length") length: Int,
+      @DefaultValue("ID") @QueryParam("sortBy") sortBy: TaskSorting): Seq[TaskData] = {
+    withStageAttempt(stageId, stageAttemptId) { stage =>
+      val tasks = stage.ui.taskData.values.map{AllStagesResource.convertTaskData}.toIndexedSeq
+        .sorted(OneStageResource.ordering(sortBy))
+      tasks.slice(offset, offset + length)  
+    }
+  }
+
+  private case class StageStatusInfoUi(status: StageStatus, info: StageInfo, ui: StageUIData)
+
+  private def withStage[T](stageId: Int)(f: Seq[StageStatusInfoUi] => T): T = {
+    val stageAttempts = findStageStatusUIData(ui.jobProgressListener, stageId)
+    if (stageAttempts.isEmpty) {
+      throw new NotFoundException("unknown stage: " + stageId)
+    } else {
+      f(stageAttempts)
+    }
+  }
+
+  private def findStageStatusUIData(
+      listener: JobProgressListener,
+      stageId: Int): Seq[StageStatusInfoUi] = {
+    listener.synchronized {
+      def getStatusInfoUi(status: StageStatus, infos: Seq[StageInfo]): Seq[StageStatusInfoUi] = {
+        infos.filter { _.stageId == stageId }.map { info =>
+          val ui = listener.stageIdToData.getOrElse((info.stageId, info.attemptId),
+            // this is an internal error -- we should always have uiData
+            throw new SparkException(
+              s"no stage ui data found for stage: ${info.stageId}:${info.attemptId}")
+          )
+          StageStatusInfoUi(status, info, ui)
+        }
+      }
+      getStatusInfoUi(ACTIVE, listener.activeStages.values.toSeq) ++
+        getStatusInfoUi(COMPLETE, listener.completedStages) ++
+        getStatusInfoUi(FAILED, listener.failedStages) ++
+        getStatusInfoUi(PENDING, listener.pendingStages.values.toSeq)
+    }
+  }
+
+  private def withStageAttempt[T](
+      stageId: Int,
+      stageAttemptId: Int)
+      (f: StageStatusInfoUi => T): T = {
+    withStage(stageId) { attempts =>
+        val oneAttempt = attempts.find { stage => stage.info.attemptId == stageAttemptId }
+        oneAttempt match {
+          case Some(stage) =>
+            f(stage)
+          case None =>
+            val stageAttempts = attempts.map { _.info.attemptId }
+            throw new NotFoundException(s"unknown attempt for stage $stageId.  " +
+              s"Found attempts: ${stageAttempts.mkString("[", ",", "]")}")
+        }
+    }
+  }
+}
+
+object OneStageResource {
+  def ordering(taskSorting: TaskSorting): Ordering[TaskData] = {
+    val extractor: (TaskData => Long) = td =>
+      taskSorting match {
+        case ID => td.taskId
+        case INCREASING_RUNTIME => td.taskMetrics.map{_.executorRunTime}.getOrElse(-1L)
+        case DECREASING_RUNTIME => -td.taskMetrics.map{_.executorRunTime}.getOrElse(-1L)
+      }
+    Ordering.by(extractor)
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/SecurityFilter.scala b/core/src/main/scala/org/apache/spark/status/api/v1/SecurityFilter.scala
new file mode 100644
index 0000000000000..95fbd96ade5ab
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/status/api/v1/SecurityFilter.scala
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.status.api.v1
+
+import javax.ws.rs.WebApplicationException
+import javax.ws.rs.core.Response
+
+import com.sun.jersey.spi.container.{ContainerRequest, ContainerRequestFilter}
+
+private[v1] class SecurityFilter extends ContainerRequestFilter with UIRootFromServletContext {
+  def filter(req: ContainerRequest): ContainerRequest = {
+    val user = Option(req.getUserPrincipal).map { _.getName }.orNull
+    if (uiRoot.securityManager.checkUIViewPermissions(user)) {
+      req
+    } else {
+      throw new WebApplicationException(
+        Response
+          .status(Response.Status.FORBIDDEN)
+          .entity(raw"""user "$user"is not authorized""")
+          .build()
+      )
+    }
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/SimpleDateParam.scala b/core/src/main/scala/org/apache/spark/status/api/v1/SimpleDateParam.scala
new file mode 100644
index 0000000000000..cee29786c3019
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/status/api/v1/SimpleDateParam.scala
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.status.api.v1
+
+import java.text.SimpleDateFormat
+import java.util.TimeZone
+import javax.ws.rs.WebApplicationException
+import javax.ws.rs.core.Response
+import javax.ws.rs.core.Response.Status
+
+import scala.util.Try
+
+private[v1] class SimpleDateParam(val originalValue: String) {
+  val timestamp: Long = {
+    SimpleDateParam.formats.collectFirst {
+      case fmt if Try(fmt.parse(originalValue)).isSuccess =>
+        fmt.parse(originalValue).getTime()
+    }.getOrElse(
+      throw new WebApplicationException(
+        Response
+          .status(Status.BAD_REQUEST)
+          .entity("Couldn't parse date: " + originalValue)
+          .build()
+      )
+    )
+  }
+}
+
+private[v1] object SimpleDateParam {
+
+  val formats: Seq[SimpleDateFormat] = {
+
+    val gmtDay = new SimpleDateFormat("yyyy-MM-dd")
+    gmtDay.setTimeZone(TimeZone.getTimeZone("GMT"))
+
+    Seq(
+      new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSz"),
+      gmtDay
+    )
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/api.scala b/core/src/main/scala/org/apache/spark/status/api/v1/api.scala
new file mode 100644
index 0000000000000..ef3c8570d8186
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/status/api/v1/api.scala
@@ -0,0 +1,228 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.status.api.v1
+
+import java.util.Date
+
+import scala.collection.Map
+
+import org.apache.spark.JobExecutionStatus
+
+class ApplicationInfo private[spark](
+    val id: String,
+    val name: String,
+    val attempts: Seq[ApplicationAttemptInfo])
+
+class ApplicationAttemptInfo private[spark](
+    val attemptId: Option[String],
+    val startTime: Date,
+    val endTime: Date,
+    val sparkUser: String,
+    val completed: Boolean = false)
+
+class ExecutorStageSummary private[spark](
+    val taskTime : Long,
+    val failedTasks : Int,
+    val succeededTasks : Int,
+    val inputBytes : Long,
+    val outputBytes : Long,
+    val shuffleRead : Long,
+    val shuffleWrite : Long,
+    val memoryBytesSpilled : Long,
+    val diskBytesSpilled : Long)
+
+class ExecutorSummary private[spark](
+    val id: String,
+    val hostPort: String,
+    val rddBlocks: Int,
+    val memoryUsed: Long,
+    val diskUsed: Long,
+    val activeTasks: Int,
+    val failedTasks: Int,
+    val completedTasks: Int,
+    val totalTasks: Int,
+    val totalDuration: Long,
+    val totalInputBytes: Long,
+    val totalShuffleRead: Long,
+    val totalShuffleWrite: Long,
+    val maxMemory: Long,
+    val executorLogs: Map[String, String])
+
+class JobData private[spark](
+    val jobId: Int,
+    val name: String,
+    val description: Option[String],
+    val submissionTime: Option[Date],
+    val completionTime: Option[Date],
+    val stageIds: Seq[Int],
+    val jobGroup: Option[String],
+    val status: JobExecutionStatus,
+    val numTasks: Int,
+    val numActiveTasks: Int,
+    val numCompletedTasks: Int,
+    val numSkippedTasks: Int,
+    val numFailedTasks: Int,
+    val numActiveStages: Int,
+    val numCompletedStages: Int,
+    val numSkippedStages: Int,
+    val numFailedStages: Int)
+
+// Q: should Tachyon size go in here as well?  currently the UI only shows it on the overall storage
+// page ... does anybody pay attention to it?
+class RDDStorageInfo private[spark](
+    val id: Int,
+    val name: String,
+    val numPartitions: Int,
+    val numCachedPartitions: Int,
+    val storageLevel: String,
+    val memoryUsed: Long,
+    val diskUsed: Long,
+    val dataDistribution: Option[Seq[RDDDataDistribution]],
+    val partitions: Option[Seq[RDDPartitionInfo]])
+
+class RDDDataDistribution private[spark](
+    val address: String,
+    val memoryUsed: Long,
+    val memoryRemaining: Long,
+    val diskUsed: Long)
+
+class RDDPartitionInfo private[spark](
+    val blockName: String,
+    val storageLevel: String,
+    val memoryUsed: Long,
+    val diskUsed: Long,
+    val executors: Seq[String])
+
+class StageData private[spark](
+    val status: StageStatus,
+    val stageId: Int,
+    val attemptId: Int,
+    val numActiveTasks: Int ,
+    val numCompleteTasks: Int,
+    val numFailedTasks: Int,
+
+    val executorRunTime: Long,
+
+    val inputBytes: Long,
+    val inputRecords: Long,
+    val outputBytes: Long,
+    val outputRecords: Long,
+    val shuffleReadBytes: Long,
+    val shuffleReadRecords: Long,
+    val shuffleWriteBytes: Long,
+    val shuffleWriteRecords: Long,
+    val memoryBytesSpilled: Long,
+    val diskBytesSpilled: Long,
+
+    val name: String,
+    val details: String,
+    val schedulingPool: String,
+
+    val accumulatorUpdates: Seq[AccumulableInfo],
+    val tasks: Option[Map[Long, TaskData]],
+    val executorSummary:Option[Map[String,ExecutorStageSummary]])
+
+class TaskData private[spark](
+    val taskId: Long,
+    val index: Int,
+    val attempt: Int,
+    val launchTime: Date,
+    val executorId: String,
+    val host: String,
+    val taskLocality: String,
+    val speculative: Boolean,
+    val accumulatorUpdates: Seq[AccumulableInfo],
+    val errorMessage: Option[String] = None,
+    val taskMetrics: Option[TaskMetrics] = None)
+
+class TaskMetrics private[spark](
+    val executorDeserializeTime: Long,
+    val executorRunTime: Long,
+    val resultSize: Long,
+    val jvmGcTime: Long,
+    val resultSerializationTime: Long,
+    val memoryBytesSpilled: Long,
+    val diskBytesSpilled: Long,
+    val inputMetrics: Option[InputMetrics],
+    val outputMetrics: Option[OutputMetrics],
+    val shuffleReadMetrics: Option[ShuffleReadMetrics],
+    val shuffleWriteMetrics: Option[ShuffleWriteMetrics])
+
+class InputMetrics private[spark](
+    val bytesRead: Long,
+    val recordsRead: Long)
+
+class OutputMetrics private[spark](
+    val bytesWritten: Long,
+    val recordsWritten: Long)
+
+class ShuffleReadMetrics private[spark](
+    val remoteBlocksFetched: Int,
+    val localBlocksFetched: Int,
+    val fetchWaitTime: Long,
+    val remoteBytesRead: Long,
+    val totalBlocksFetched: Int,
+    val recordsRead: Long)
+
+class ShuffleWriteMetrics private[spark](
+    val bytesWritten: Long,
+    val writeTime: Long,
+    val recordsWritten: Long)
+
+class TaskMetricDistributions private[spark](
+    val quantiles: IndexedSeq[Double],
+
+    val executorDeserializeTime: IndexedSeq[Double],
+    val executorRunTime: IndexedSeq[Double],
+    val resultSize: IndexedSeq[Double],
+    val jvmGcTime: IndexedSeq[Double],
+    val resultSerializationTime: IndexedSeq[Double],
+    val memoryBytesSpilled: IndexedSeq[Double],
+    val diskBytesSpilled: IndexedSeq[Double],
+
+    val inputMetrics: Option[InputMetricDistributions],
+    val outputMetrics: Option[OutputMetricDistributions],
+    val shuffleReadMetrics: Option[ShuffleReadMetricDistributions],
+    val shuffleWriteMetrics: Option[ShuffleWriteMetricDistributions])
+
+class InputMetricDistributions private[spark](
+    val bytesRead: IndexedSeq[Double],
+    val recordsRead: IndexedSeq[Double])
+
+class OutputMetricDistributions private[spark](
+    val bytesWritten: IndexedSeq[Double],
+    val recordsWritten: IndexedSeq[Double])
+
+class ShuffleReadMetricDistributions private[spark](
+    val readBytes: IndexedSeq[Double],
+    val readRecords: IndexedSeq[Double],
+    val remoteBlocksFetched: IndexedSeq[Double],
+    val localBlocksFetched: IndexedSeq[Double],
+    val fetchWaitTime: IndexedSeq[Double],
+    val remoteBytesRead: IndexedSeq[Double],
+    val totalBlocksFetched: IndexedSeq[Double])
+
+class ShuffleWriteMetricDistributions private[spark](
+    val writeBytes: IndexedSeq[Double],
+    val writeRecords: IndexedSeq[Double],
+    val writeTime: IndexedSeq[Double])
+
+class AccumulableInfo private[spark](
+    val id: Long,
+    val name: String,
+    val update: Option[String],
+    val value: String)
diff --git a/core/src/main/scala/org/apache/spark/storage/StorageStatusListener.scala b/core/src/main/scala/org/apache/spark/storage/StorageStatusListener.scala
index 7d75929b96f75..ec711480ebf30 100644
--- a/core/src/main/scala/org/apache/spark/storage/StorageStatusListener.scala
+++ b/core/src/main/scala/org/apache/spark/storage/StorageStatusListener.scala
@@ -25,13 +25,17 @@ import org.apache.spark.scheduler._
 /**
  * :: DeveloperApi ::
  * A SparkListener that maintains executor storage status.
+ *
+ * This class is thread-safe (unlike JobProgressListener)
  */
 @DeveloperApi
 class StorageStatusListener extends SparkListener {
   // This maintains only blocks that are cached (i.e. storage level is not StorageLevel.NONE)
   private[storage] val executorIdToStorageStatus = mutable.Map[String, StorageStatus]()
 
-  def storageStatusList: Seq[StorageStatus] = executorIdToStorageStatus.values.toSeq
+  def storageStatusList: Seq[StorageStatus] = synchronized {
+    executorIdToStorageStatus.values.toSeq
+  }
 
   /** Update storage status list to reflect updated block statuses */
   private def updateStorageStatus(execId: String, updatedBlocks: Seq[(BlockId, BlockStatus)]) {
diff --git a/core/src/main/scala/org/apache/spark/ui/SparkUI.scala b/core/src/main/scala/org/apache/spark/ui/SparkUI.scala
index a5271f0574e6c..bfe4a180e8a6f 100644
--- a/core/src/main/scala/org/apache/spark/ui/SparkUI.scala
+++ b/core/src/main/scala/org/apache/spark/ui/SparkUI.scala
@@ -17,6 +17,9 @@
 
 package org.apache.spark.ui
 
+import java.util.Date
+
+import org.apache.spark.status.api.v1.{ApplicationAttemptInfo, ApplicationInfo, JsonRootResource, UIRoot}
 import org.apache.spark.{Logging, SecurityManager, SparkConf, SparkContext}
 import org.apache.spark.scheduler._
 import org.apache.spark.storage.StorageStatusListener
@@ -33,7 +36,7 @@ import org.apache.spark.ui.scope.RDDOperationGraphListener
 private[spark] class SparkUI private (
     val sc: Option[SparkContext],
     val conf: SparkConf,
-    val securityManager: SecurityManager,
+    securityManager: SecurityManager,
     val environmentListener: EnvironmentListener,
     val storageStatusListener: StorageStatusListener,
     val executorsListener: ExecutorsListener,
@@ -41,22 +44,27 @@ private[spark] class SparkUI private (
     val storageListener: StorageListener,
     val operationGraphListener: RDDOperationGraphListener,
     var appName: String,
-    val basePath: String)
+    val basePath: String,
+    val startTime: Long)
   extends WebUI(securityManager, SparkUI.getUIPort(conf), conf, basePath, "SparkUI")
-  with Logging {
+  with Logging
+  with UIRoot {
 
   val killEnabled = sc.map(_.conf.getBoolean("spark.ui.killEnabled", true)).getOrElse(false)
 
+
+  val stagesTab = new StagesTab(this)
+
   /** Initialize all components of the server. */
   def initialize() {
     attachTab(new JobsTab(this))
-    val stagesTab = new StagesTab(this)
     attachTab(stagesTab)
     attachTab(new StorageTab(this))
     attachTab(new EnvironmentTab(this))
     attachTab(new ExecutorsTab(this))
     attachHandler(createStaticHandler(SparkUI.STATIC_RESOURCE_DIR, "/static"))
     attachHandler(createRedirectHandler("/", "/jobs", basePath = basePath))
+    attachHandler(JsonRootResource.getJsonServlet(this))
     // This should be POST only, but, the YARN AM proxy won't proxy POSTs
     attachHandler(createRedirectHandler(
       "/stages/stage/kill", "/stages", stagesTab.handleKillRequest,
@@ -83,6 +91,24 @@ private[spark] class SparkUI private (
   private[spark] def appUIHostPort = publicHostName + ":" + boundPort
 
   private[spark] def appUIAddress = s"http://$appUIHostPort"
+
+  def getSparkUI(appId: String): Option[SparkUI] = {
+    if (appId == appName) Some(this) else None
+  }
+
+  def getApplicationInfoList: Iterator[ApplicationInfo] = {
+    Iterator(new ApplicationInfo(
+      id = appName,
+      name = appName,
+      attempts = Seq(new ApplicationAttemptInfo(
+        attemptId = None,
+        startTime = new Date(startTime),
+        endTime = new Date(-1),
+        sparkUser = "",
+        completed = false
+      ))
+    ))
+  }
 }
 
 private[spark] abstract class SparkUITab(parent: SparkUI, prefix: String)
@@ -109,9 +135,10 @@ private[spark] object SparkUI {
       listenerBus: SparkListenerBus,
       jobProgressListener: JobProgressListener,
       securityManager: SecurityManager,
-      appName: String): SparkUI =  {
+      appName: String,
+      startTime: Long): SparkUI =  {
     create(Some(sc), conf, listenerBus, securityManager, appName,
-      jobProgressListener = Some(jobProgressListener))
+      jobProgressListener = Some(jobProgressListener), startTime = startTime)
   }
 
   def createHistoryUI(
@@ -119,8 +146,9 @@ private[spark] object SparkUI {
       listenerBus: SparkListenerBus,
       securityManager: SecurityManager,
       appName: String,
-      basePath: String): SparkUI = {
-    create(None, conf, listenerBus, securityManager, appName, basePath)
+      basePath: String,
+      startTime: Long): SparkUI = {
+    create(None, conf, listenerBus, securityManager, appName, basePath, startTime = startTime)
   }
 
   /**
@@ -137,7 +165,8 @@ private[spark] object SparkUI {
       securityManager: SecurityManager,
       appName: String,
       basePath: String = "",
-      jobProgressListener: Option[JobProgressListener] = None): SparkUI = {
+      jobProgressListener: Option[JobProgressListener] = None,
+      startTime: Long): SparkUI = {
 
     val _jobProgressListener: JobProgressListener = jobProgressListener.getOrElse {
       val listener = new JobProgressListener(conf)
@@ -159,6 +188,6 @@ private[spark] object SparkUI {
 
     new SparkUI(sc, conf, securityManager, environmentListener, storageStatusListener,
       executorsListener, _jobProgressListener, storageListener, operationGraphListener,
-      appName, basePath)
+      appName, basePath, startTime)
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/ui/WebUI.scala b/core/src/main/scala/org/apache/spark/ui/WebUI.scala
index f9860d1a5ce76..384f2ad26e281 100644
--- a/core/src/main/scala/org/apache/spark/ui/WebUI.scala
+++ b/core/src/main/scala/org/apache/spark/ui/WebUI.scala
@@ -37,7 +37,7 @@ import org.apache.spark.{Logging, SecurityManager, SparkConf}
  * pages. The use of tabs is optional, however; a WebUI may choose to include pages directly.
  */
 private[spark] abstract class WebUI(
-    securityManager: SecurityManager,
+    val securityManager: SecurityManager,
     port: Int,
     conf: SparkConf,
     basePath: String = "",
@@ -77,15 +77,9 @@ private[spark] abstract class WebUI(
     val pagePath = "/" + page.prefix
     val renderHandler = createServletHandler(pagePath,
       (request: HttpServletRequest) => page.render(request), securityManager, basePath)
-    val renderJsonHandler = createServletHandler(pagePath.stripSuffix("/") + "/json",
-      (request: HttpServletRequest) => page.renderJson(request), securityManager, basePath)
     attachHandler(renderHandler)
-    attachHandler(renderJsonHandler)
     pageToHandlers.getOrElseUpdate(page, ArrayBuffer[ServletContextHandler]())
       .append(renderHandler)
-    pageToHandlers.getOrElseUpdate(page, ArrayBuffer[ServletContextHandler]())
-      .append(renderJsonHandler)
-    
   }
 
   /** Attach a handler to this UI. */
diff --git a/core/src/main/scala/org/apache/spark/ui/exec/ExecutorsPage.scala b/core/src/main/scala/org/apache/spark/ui/exec/ExecutorsPage.scala
index 956608d7c0cbe..b247e4cdc3bd4 100644
--- a/core/src/main/scala/org/apache/spark/ui/exec/ExecutorsPage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/exec/ExecutorsPage.scala
@@ -22,11 +22,11 @@ import javax.servlet.http.HttpServletRequest
 
 import scala.xml.Node
 
+import org.apache.spark.status.api.v1.ExecutorSummary
 import org.apache.spark.ui.{ToolTips, UIUtils, WebUIPage}
 import org.apache.spark.util.Utils
 
-/** Summary information about an executor to display in the UI. */
-// Needs to be private[ui] because of a false positive MiMa failure.
+// This isn't even used anymore -- but we need to keep it b/c of a MiMa false positive
 private[ui] case class ExecutorSummaryInfo(
     id: String,
     hostPort: String,
@@ -44,6 +44,7 @@ private[ui] case class ExecutorSummaryInfo(
     maxMemory: Long,
     executorLogs: Map[String, String])
 
+
 private[ui] class ExecutorsPage(
     parent: ExecutorsTab,
     threadDumpEnabled: Boolean)
@@ -55,7 +56,8 @@ private[ui] class ExecutorsPage(
     val maxMem = storageStatusList.map(_.maxMem).sum
     val memUsed = storageStatusList.map(_.memUsed).sum
     val diskUsed = storageStatusList.map(_.diskUsed).sum
-    val execInfo = for (statusId <- 0 until storageStatusList.size) yield getExecInfo(statusId)
+    val execInfo = for (statusId <- 0 until storageStatusList.size) yield
+      ExecutorsPage.getExecInfo(listener, statusId)
     val execInfoSorted = execInfo.sortBy(_.id)
     val logsExist = execInfo.filter(_.executorLogs.nonEmpty).nonEmpty
 
@@ -111,7 +113,7 @@ private[ui] class ExecutorsPage(
   }
 
   /** Render an HTML row representing an executor */
-  private def execRow(info: ExecutorSummaryInfo, logsExist: Boolean): Seq[Node] = {
+  private def execRow(info: ExecutorSummary, logsExist: Boolean): Seq[Node] = {
     val maximumMemory = info.maxMemory
     val memoryUsed = info.memoryUsed
     val diskUsed = info.diskUsed
@@ -170,8 +172,11 @@ private[ui] class ExecutorsPage(
     </tr>
   }
 
+}
+
+private[spark] object ExecutorsPage {
   /** Represent an executor's info as a map given a storage status index */
-  private def getExecInfo(statusId: Int): ExecutorSummaryInfo = {
+  def getExecInfo(listener: ExecutorsListener, statusId: Int): ExecutorSummary = {
     val status = listener.storageStatusList(statusId)
     val execId = status.blockManagerId.executorId
     val hostPort = status.blockManagerId.hostPort
@@ -189,7 +194,7 @@ private[ui] class ExecutorsPage(
     val totalShuffleWrite = listener.executorToShuffleWrite.getOrElse(execId, 0L)
     val executorLogs = listener.executorToLogUrls.getOrElse(execId, Map.empty)
 
-    new ExecutorSummaryInfo(
+    new ExecutorSummary(
       execId,
       hostPort,
       rddBlocks,
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala
index f6abf27db49dd..09323d1d80ad6 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala
@@ -271,6 +271,12 @@ private[ui] class AllJobsPage(parent: JobsTab) extends WebUIPage("") {
       val shouldShowCompletedJobs = completedJobs.nonEmpty
       val shouldShowFailedJobs = failedJobs.nonEmpty
 
+      val completedJobNumStr = if (completedJobs.size == listener.numCompletedJobs) {
+        s"${completedJobs.size}"
+      } else {
+        s"${listener.numCompletedJobs}, only showing ${completedJobs.size}"
+      }
+
       val summary: NodeSeq =
         <div>
           <ul class="unstyled">
@@ -295,9 +301,9 @@ private[ui] class AllJobsPage(parent: JobsTab) extends WebUIPage("") {
             }
             {
               if (shouldShowCompletedJobs) {
-                <li>
+                <li id="completed-summary">
                   <a href="#completed"><strong>Completed Jobs:</strong></a>
-                  {completedJobs.size}
+                  {completedJobNumStr}
                 </li>
               }
             }
@@ -305,7 +311,7 @@ private[ui] class AllJobsPage(parent: JobsTab) extends WebUIPage("") {
               if (shouldShowFailedJobs) {
                 <li>
                   <a href="#failed"><strong>Failed Jobs:</strong></a>
-                  {failedJobs.size}
+                  {listener.numFailedJobs}
                 </li>
               }
             }
@@ -322,7 +328,7 @@ private[ui] class AllJobsPage(parent: JobsTab) extends WebUIPage("") {
           activeJobsTable
       }
       if (shouldShowCompletedJobs) {
-        content ++= <h4 id="completed">Completed Jobs ({completedJobs.size})</h4> ++
+        content ++= <h4 id="completed">Completed Jobs ({completedJobNumStr})</h4> ++
           completedJobsTable
       }
       if (shouldShowFailedJobs) {
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/AllStagesPage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/AllStagesPage.scala
index 236bc8ea92879..a37f739ab9c66 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/AllStagesPage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/AllStagesPage.scala
@@ -64,6 +64,12 @@ private[ui] class AllStagesPage(parent: StagesTab) extends WebUIPage("") {
       val shouldShowCompletedStages = completedStages.nonEmpty
       val shouldShowFailedStages = failedStages.nonEmpty
 
+      val completedStageNumStr = if (numCompletedStages == completedStages.size) {
+        s"$numCompletedStages"
+      } else {
+        s"$numCompletedStages, only showing ${completedStages.size}"
+      }
+
       val summary: NodeSeq =
         <div>
           <ul class="unstyled">
@@ -98,9 +104,9 @@ private[ui] class AllStagesPage(parent: StagesTab) extends WebUIPage("") {
             }
             {
               if (shouldShowCompletedStages) {
-                <li>
+                <li id="completed-summary">
                   <a href="#completed"><strong>Completed Stages:</strong></a>
-                  {numCompletedStages}
+                  {completedStageNumStr}
                 </li>
               }
             }
@@ -132,7 +138,7 @@ private[ui] class AllStagesPage(parent: StagesTab) extends WebUIPage("") {
         pendingStagesTable.toNodeSeq
       }
       if (shouldShowCompletedStages) {
-        content ++= <h4 id="completed">Completed Stages ({numCompletedStages})</h4> ++
+        content ++= <h4 id="completed">Completed Stages ({completedStageNumStr})</h4> ++
         completedStagesTable.toNodeSeq
       }
       if (shouldShowFailedStages) {
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/JobPage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/JobPage.scala
index 96cc3d78d0f15..7163217e1fed0 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/JobPage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/JobPage.scala
@@ -187,7 +187,7 @@ private[ui] class JobPage(parent: JobsTab) extends WebUIPage("job") {
       val jobDataOption = listener.jobIdToData.get(jobId)
       if (jobDataOption.isEmpty) {
         val content =
-          <div>
+          <div id="no-info">
             <p>No information to display for job {jobId}</p>
           </div>
         return UIUtils.headerSparkPage(
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/JobProgressListener.scala b/core/src/main/scala/org/apache/spark/ui/jobs/JobProgressListener.scala
index 8f9aa9fdec819..246e191d64776 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/JobProgressListener.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/JobProgressListener.scala
@@ -74,6 +74,8 @@ class JobProgressListener(conf: SparkConf) extends SparkListener with Logging {
   // JobProgressListener's retention limits.
   var numCompletedStages = 0
   var numFailedStages = 0
+  var numCompletedJobs = 0
+  var numFailedJobs = 0
 
   // Misc:
   val executorIdToBlockManagerId = HashMap[ExecutorId, BlockManagerId]()
@@ -217,10 +219,12 @@ class JobProgressListener(conf: SparkConf) extends SparkListener with Logging {
         completedJobs += jobData
         trimJobsIfNecessary(completedJobs)
         jobData.status = JobExecutionStatus.SUCCEEDED
+        numCompletedJobs += 1
       case JobFailed(exception) =>
         failedJobs += jobData
         trimJobsIfNecessary(failedJobs)
         jobData.status = JobExecutionStatus.FAILED
+        numFailedJobs += 1
     }
     for (stageId <- jobData.stageIds) {
       stageIdToActiveJobIds.get(stageId).foreach { jobsUsingStage =>
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/PoolPage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/PoolPage.scala
index d725b9d8565ac..f3e0b38523f32 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/PoolPage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/PoolPage.scala
@@ -21,7 +21,7 @@ import javax.servlet.http.HttpServletRequest
 
 import scala.xml.Node
 
-import org.apache.spark.scheduler.{Schedulable, StageInfo}
+import org.apache.spark.scheduler.StageInfo
 import org.apache.spark.ui.{WebUIPage, UIUtils}
 
 /** Page showing specific pool details */
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala
index 6c4305873cbd9..b01fad8e453c8 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala
@@ -25,11 +25,11 @@ import scala.xml.{Elem, Node, Unparsed}
 import org.apache.commons.lang3.StringEscapeUtils
 
 import org.apache.spark.executor.TaskMetrics
+import org.apache.spark.scheduler.{AccumulableInfo, TaskInfo}
 import org.apache.spark.ui.{ToolTips, WebUIPage, UIUtils}
 import org.apache.spark.ui.jobs.UIData._
 import org.apache.spark.ui.scope.RDDOperationGraph
 import org.apache.spark.util.{Utils, Distribution}
-import org.apache.spark.scheduler.{AccumulableInfo, TaskInfo}
 
 /** Page showing statistics and task list for a given stage */
 private[ui] class StagePage(parent: StagesTab) extends WebUIPage("stage") {
@@ -52,14 +52,22 @@ private[ui] class StagePage(parent: StagesTab) extends WebUIPage("stage") {
       val stageAttemptId = parameterAttempt.toInt
       val stageDataOption = progressListener.stageIdToData.get((stageId, stageAttemptId))
 
-      if (stageDataOption.isEmpty || stageDataOption.get.taskData.isEmpty) {
+      val stageHeader = s"Details for Stage $stageId (Attempt $stageAttemptId)"
+      if (stageDataOption.isEmpty) {
+        val content =
+          <div id="no-info">
+            <p>No information to display for Stage {stageId} (Attempt {stageAttemptId})</p>
+          </div>
+        return UIUtils.headerSparkPage(stageHeader, content, parent)
+
+      }
+      if (stageDataOption.get.taskData.isEmpty) {
         val content =
           <div>
             <h4>Summary Metrics</h4> No tasks have started yet
             <h4>Tasks</h4> No tasks have started yet
           </div>
-        return UIUtils.headerSparkPage(
-          s"Details for Stage $stageId (Attempt $stageAttemptId)", content, parent)
+        return UIUtils.headerSparkPage(stageHeader, content, parent)
       }
 
       val stageData = stageDataOption.get
@@ -458,8 +466,7 @@ private[ui] class StagePage(parent: StagesTab) extends WebUIPage("stage") {
         maybeAccumulableTable ++
         <h4>Tasks</h4> ++ taskTable
 
-      UIUtils.headerSparkPage(
-        "Details for Stage %d".format(stageId), content, parent, showVisualization = true)
+      UIUtils.headerSparkPage(stageHeader, content, parent, showVisualization = true)
     }
   }
 
diff --git a/core/src/main/scala/org/apache/spark/ui/storage/RDDPage.scala b/core/src/main/scala/org/apache/spark/ui/storage/RDDPage.scala
index 199f731b92bcc..05f94a7507f4f 100644
--- a/core/src/main/scala/org/apache/spark/ui/storage/RDDPage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/storage/RDDPage.scala
@@ -21,8 +21,8 @@ import javax.servlet.http.HttpServletRequest
 
 import scala.xml.Node
 
-import org.apache.spark.storage.{BlockId, BlockStatus, StorageStatus, StorageUtils}
-import org.apache.spark.ui.{WebUIPage, UIUtils}
+import org.apache.spark.status.api.v1.{AllRDDResource, RDDDataDistribution, RDDPartitionInfo}
+import org.apache.spark.ui.{UIUtils, WebUIPage}
 import org.apache.spark.util.Utils
 
 /** Page showing storage details for a given RDD */
@@ -32,28 +32,19 @@ private[ui] class RDDPage(parent: StorageTab) extends WebUIPage("rdd") {
   def render(request: HttpServletRequest): Seq[Node] = {
     val parameterId = request.getParameter("id")
     require(parameterId != null && parameterId.nonEmpty, "Missing id parameter")
-
     val rddId = parameterId.toInt
-    val storageStatusList = listener.storageStatusList
-    val rddInfo = listener.rddInfoList.find(_.id == rddId).getOrElse {
-      // Rather than crashing, render an "RDD Not Found" page
-      return UIUtils.headerSparkPage("RDD Not Found", Seq[Node](), parent)
-    }
+    val rddStorageInfo = AllRDDResource.getRDDStorageInfo(rddId, listener,includeDetails = true)
+      .getOrElse {
+        // Rather than crashing, render an "RDD Not Found" page
+        return UIUtils.headerSparkPage("RDD Not Found", Seq[Node](), parent)
+      }
 
     // Worker table
-    val workers = storageStatusList.map((rddId, _))
-    val workerTable = UIUtils.listingTable(workerHeader, workerRow, workers,
-      id = Some("rdd-storage-by-worker-table"))
+    val workerTable = UIUtils.listingTable(workerHeader, workerRow,
+      rddStorageInfo.dataDistribution.get, id = Some("rdd-storage-by-worker-table"))
 
     // Block table
-    val blockLocations = StorageUtils.getRddBlockLocations(rddId, storageStatusList)
-    val blocks = storageStatusList
-      .flatMap(_.rddBlocksById(rddId))
-      .sortWith(_._1.name < _._1.name)
-      .map { case (blockId, status) =>
-        (blockId, status, blockLocations.get(blockId).getOrElse(Seq[String]("Unknown")))
-      }
-    val blockTable = UIUtils.listingTable(blockHeader, blockRow, blocks,
+    val blockTable = UIUtils.listingTable(blockHeader, blockRow, rddStorageInfo.partitions.get,
       id = Some("rdd-storage-by-block-table"))
 
     val content =
@@ -62,23 +53,23 @@ private[ui] class RDDPage(parent: StorageTab) extends WebUIPage("rdd") {
           <ul class="unstyled">
             <li>
               <strong>Storage Level:</strong>
-              {rddInfo.storageLevel.description}
+              {rddStorageInfo.storageLevel}
             </li>
             <li>
               <strong>Cached Partitions:</strong>
-              {rddInfo.numCachedPartitions}
+              {rddStorageInfo.numCachedPartitions}
             </li>
             <li>
               <strong>Total Partitions:</strong>
-              {rddInfo.numPartitions}
+              {rddStorageInfo.numPartitions}
             </li>
             <li>
               <strong>Memory Size:</strong>
-              {Utils.bytesToString(rddInfo.memSize)}
+              {Utils.bytesToString(rddStorageInfo.memoryUsed)}
             </li>
             <li>
               <strong>Disk Size:</strong>
-              {Utils.bytesToString(rddInfo.diskSize)}
+              {Utils.bytesToString(rddStorageInfo.diskUsed)}
             </li>
           </ul>
         </div>
@@ -86,19 +77,19 @@ private[ui] class RDDPage(parent: StorageTab) extends WebUIPage("rdd") {
 
       <div class="row-fluid">
         <div class="span12">
-          <h4> Data Distribution on {workers.size} Executors </h4>
+          <h4> Data Distribution on {rddStorageInfo.dataDistribution.size} Executors </h4>
           {workerTable}
         </div>
       </div>
 
       <div class="row-fluid">
         <div class="span12">
-          <h4> {blocks.size} Partitions </h4>
+          <h4> {rddStorageInfo.partitions.size} Partitions </h4>
           {blockTable}
         </div>
       </div>;
 
-    UIUtils.headerSparkPage("RDD Storage Info for " + rddInfo.name, content, parent)
+    UIUtils.headerSparkPage("RDD Storage Info for " + rddStorageInfo.name, content, parent)
   }
 
   /** Header fields for the worker table */
@@ -116,34 +107,32 @@ private[ui] class RDDPage(parent: StorageTab) extends WebUIPage("rdd") {
     "Executors")
 
   /** Render an HTML row representing a worker */
-  private def workerRow(worker: (Int, StorageStatus)): Seq[Node] = {
-    val (rddId, status) = worker
+  private def workerRow(worker: RDDDataDistribution): Seq[Node] = {
     <tr>
-      <td>{status.blockManagerId.host + ":" + status.blockManagerId.port}</td>
+      <td>{worker.address}</td>
       <td>
-        {Utils.bytesToString(status.memUsedByRdd(rddId))}
-        ({Utils.bytesToString(status.memRemaining)} Remaining)
+        {Utils.bytesToString(worker.memoryUsed)}
+        ({Utils.bytesToString(worker.memoryRemaining)} Remaining)
       </td>
-      <td>{Utils.bytesToString(status.diskUsedByRdd(rddId))}</td>
+      <td>{Utils.bytesToString(worker.diskUsed)}</td>
     </tr>
   }
 
   /** Render an HTML row representing a block */
-  private def blockRow(row: (BlockId, BlockStatus, Seq[String])): Seq[Node] = {
-    val (id, block, locations) = row
+  private def blockRow(row: RDDPartitionInfo): Seq[Node] = {
     <tr>
-      <td>{id}</td>
+      <td>{row.blockName}</td>
       <td>
-        {block.storageLevel.description}
+        {row.storageLevel}
       </td>
-      <td sorttable_customkey={block.memSize.toString}>
-        {Utils.bytesToString(block.memSize)}
+      <td sorttable_customkey={row.memoryUsed.toString}>
+        {Utils.bytesToString(row.memoryUsed)}
       </td>
-      <td sorttable_customkey={block.diskSize.toString}>
-        {Utils.bytesToString(block.diskSize)}
+      <td sorttable_customkey={row.diskUsed.toString}>
+        {Utils.bytesToString(row.diskUsed)}
       </td>
       <td>
-        {locations.map(l => <span>{l}<br/></span>)}
+        {row.executors.map(l => <span>{l}<br/></span>)}
       </td>
     </tr>
   }
diff --git a/core/src/main/scala/org/apache/spark/ui/storage/StoragePage.scala b/core/src/main/scala/org/apache/spark/ui/storage/StoragePage.scala
index 59dc6b547c1d8..07db783c572cf 100644
--- a/core/src/main/scala/org/apache/spark/ui/storage/StoragePage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/storage/StoragePage.scala
@@ -22,7 +22,7 @@ import javax.servlet.http.HttpServletRequest
 import scala.xml.Node
 
 import org.apache.spark.storage.RDDInfo
-import org.apache.spark.ui.{WebUIPage, UIUtils}
+import org.apache.spark.ui.{UIUtils, WebUIPage}
 import org.apache.spark.util.Utils
 
 /** Page showing list of RDD's currently stored in the cluster */
diff --git a/core/src/main/scala/org/apache/spark/ui/storage/StorageTab.scala b/core/src/main/scala/org/apache/spark/ui/storage/StorageTab.scala
index 045bd784990d1..0351749700962 100644
--- a/core/src/main/scala/org/apache/spark/ui/storage/StorageTab.scala
+++ b/core/src/main/scala/org/apache/spark/ui/storage/StorageTab.scala
@@ -35,6 +35,8 @@ private[ui] class StorageTab(parent: SparkUI) extends SparkUITab(parent, "storag
 /**
  * :: DeveloperApi ::
  * A SparkListener that prepares information to be displayed on the BlockManagerUI.
+ *
+ * This class is thread-safe (unlike JobProgressListener)
  */
 @DeveloperApi
 class StorageListener(storageStatusListener: StorageStatusListener) extends SparkListener {
@@ -43,7 +45,9 @@ class StorageListener(storageStatusListener: StorageStatusListener) extends Spar
   def storageStatusList: Seq[StorageStatus] = storageStatusListener.storageStatusList
 
   /** Filter RDD info to include only those with cached partitions */
-  def rddInfoList: Seq[RDDInfo] = _rddInfoMap.values.filter(_.numCachedPartitions > 0).toSeq
+  def rddInfoList: Seq[RDDInfo] = synchronized {
+    _rddInfoMap.values.filter(_.numCachedPartitions > 0).toSeq
+  }
 
   /** Update the storage info of the RDDs whose blocks are among the given updated blocks */
   private def updateRDDInfo(updatedBlocks: Seq[(BlockId, BlockStatus)]): Unit = {
diff --git a/core/src/test/resources/HistoryServerExpectations/application_list_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/application_list_json_expectation.json
new file mode 100644
index 0000000000000..ce4fe80b66aa5
--- /dev/null
+++ b/core/src/test/resources/HistoryServerExpectations/application_list_json_expectation.json
@@ -0,0 +1,53 @@
+[ {
+  "id" : "local-1430917381534",
+  "name" : "Spark shell",
+  "attempts" : [ {
+    "startTime" : "2015-05-06T13:03:00.893GMT",
+    "endTime" : "2015-05-06T13:03:11.398GMT",
+    "sparkUser" : "irashid",
+    "completed" : true
+  } ]
+}, {
+  "id" : "local-1426533911241",
+  "name" : "Spark shell",
+  "attempts" : [ {
+    "attemptId" : "2",
+    "startTime" : "2015-03-17T23:11:50.242GMT",
+    "endTime" : "2015-03-17T23:12:25.177GMT",
+    "sparkUser" : "irashid",
+    "completed" : true
+  }, {
+    "attemptId" : "1",
+    "startTime" : "2015-03-16T19:25:10.242GMT",
+    "endTime" : "2015-03-16T19:25:45.177GMT",
+    "sparkUser" : "irashid",
+    "completed" : true
+  } ]
+}, {
+  "id" : "local-1425081759269",
+  "name" : "Spark shell",
+  "attempts" : [ {
+    "startTime" : "2015-02-28T00:02:38.277GMT",
+    "endTime" : "2015-02-28T00:02:46.912GMT",
+    "sparkUser" : "irashid",
+    "completed" : true
+  } ]
+}, {
+  "id" : "local-1422981780767",
+  "name" : "Spark shell",
+  "attempts" : [ {
+    "startTime" : "2015-02-03T16:42:59.720GMT",
+    "endTime" : "2015-02-03T16:43:08.731GMT",
+    "sparkUser" : "irashid",
+    "completed" : true
+  } ]
+}, {
+  "id" : "local-1422981759269",
+  "name" : "Spark shell",
+  "attempts" : [ {
+    "startTime" : "2015-02-03T16:42:38.277GMT",
+    "endTime" : "2015-02-03T16:42:46.912GMT",
+    "sparkUser" : "irashid",
+    "completed" : true
+  } ]
+} ]
\ No newline at end of file
diff --git a/core/src/test/resources/HistoryServerExpectations/complete_stage_list_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/complete_stage_list_json_expectation.json
new file mode 100644
index 0000000000000..31ac9beea8788
--- /dev/null
+++ b/core/src/test/resources/HistoryServerExpectations/complete_stage_list_json_expectation.json
@@ -0,0 +1,67 @@
+[ {
+  "status" : "COMPLETE",
+  "stageId" : 3,
+  "attemptId" : 0,
+  "numActiveTasks" : 0,
+  "numCompleteTasks" : 8,
+  "numFailedTasks" : 0,
+  "executorRunTime" : 162,
+  "inputBytes" : 160,
+  "inputRecords" : 0,
+  "outputBytes" : 0,
+  "outputRecords" : 0,
+  "shuffleReadBytes" : 0,
+  "shuffleReadRecords" : 0,
+  "shuffleWriteBytes" : 0,
+  "shuffleWriteRecords" : 0,
+  "memoryBytesSpilled" : 0,
+  "diskBytesSpilled" : 0,
+  "name" : "count at <console>:17",
+  "details" : "org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line19.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:17)\n$line19.$read$$iwC$$iwC$$iwC.<init>(<console>:22)\n$line19.$read$$iwC$$iwC.<init>(<console>:24)\n$line19.$read$$iwC.<init>(<console>:26)\n$line19.$read.<init>(<console>:28)\n$line19.$read$.<init>(<console>:32)\n$line19.$read$.<clinit>(<console>)\n$line19.$eval$.<init>(<console>:7)\n$line19.$eval$.<clinit>(<console>)\n$line19.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)",
+  "schedulingPool" : "default",
+  "accumulatorUpdates" : [ ]
+}, {
+  "status" : "COMPLETE",
+  "stageId" : 1,
+  "attemptId" : 0,
+  "numActiveTasks" : 0,
+  "numCompleteTasks" : 8,
+  "numFailedTasks" : 0,
+  "executorRunTime" : 3476,
+  "inputBytes" : 28000128,
+  "inputRecords" : 0,
+  "outputBytes" : 0,
+  "outputRecords" : 0,
+  "shuffleReadBytes" : 0,
+  "shuffleReadRecords" : 0,
+  "shuffleWriteBytes" : 13180,
+  "shuffleWriteRecords" : 0,
+  "memoryBytesSpilled" : 0,
+  "diskBytesSpilled" : 0,
+  "name" : "map at <console>:14",
+  "details" : "org.apache.spark.rdd.RDD.map(RDD.scala:271)\n$line10.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:14)\n$line10.$read$$iwC$$iwC$$iwC.<init>(<console>:19)\n$line10.$read$$iwC$$iwC.<init>(<console>:21)\n$line10.$read$$iwC.<init>(<console>:23)\n$line10.$read.<init>(<console>:25)\n$line10.$read$.<init>(<console>:29)\n$line10.$read$.<clinit>(<console>)\n$line10.$eval$.<init>(<console>:7)\n$line10.$eval$.<clinit>(<console>)\n$line10.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)",
+  "schedulingPool" : "default",
+  "accumulatorUpdates" : [ ]
+}, {
+  "status" : "COMPLETE",
+  "stageId" : 0,
+  "attemptId" : 0,
+  "numActiveTasks" : 0,
+  "numCompleteTasks" : 8,
+  "numFailedTasks" : 0,
+  "executorRunTime" : 4338,
+  "inputBytes" : 0,
+  "inputRecords" : 0,
+  "outputBytes" : 0,
+  "outputRecords" : 0,
+  "shuffleReadBytes" : 0,
+  "shuffleReadRecords" : 0,
+  "shuffleWriteBytes" : 0,
+  "shuffleWriteRecords" : 0,
+  "memoryBytesSpilled" : 0,
+  "diskBytesSpilled" : 0,
+  "name" : "count at <console>:15",
+  "details" : "org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line9.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:15)\n$line9.$read$$iwC$$iwC$$iwC.<init>(<console>:20)\n$line9.$read$$iwC$$iwC.<init>(<console>:22)\n$line9.$read$$iwC.<init>(<console>:24)\n$line9.$read.<init>(<console>:26)\n$line9.$read$.<init>(<console>:30)\n$line9.$read$.<clinit>(<console>)\n$line9.$eval$.<init>(<console>:7)\n$line9.$eval$.<clinit>(<console>)\n$line9.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)",
+  "schedulingPool" : "default",
+  "accumulatorUpdates" : [ ]
+} ]
\ No newline at end of file
diff --git a/core/src/test/resources/HistoryServerExpectations/completed_app_list_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/completed_app_list_json_expectation.json
new file mode 100644
index 0000000000000..ce4fe80b66aa5
--- /dev/null
+++ b/core/src/test/resources/HistoryServerExpectations/completed_app_list_json_expectation.json
@@ -0,0 +1,53 @@
+[ {
+  "id" : "local-1430917381534",
+  "name" : "Spark shell",
+  "attempts" : [ {
+    "startTime" : "2015-05-06T13:03:00.893GMT",
+    "endTime" : "2015-05-06T13:03:11.398GMT",
+    "sparkUser" : "irashid",
+    "completed" : true
+  } ]
+}, {
+  "id" : "local-1426533911241",
+  "name" : "Spark shell",
+  "attempts" : [ {
+    "attemptId" : "2",
+    "startTime" : "2015-03-17T23:11:50.242GMT",
+    "endTime" : "2015-03-17T23:12:25.177GMT",
+    "sparkUser" : "irashid",
+    "completed" : true
+  }, {
+    "attemptId" : "1",
+    "startTime" : "2015-03-16T19:25:10.242GMT",
+    "endTime" : "2015-03-16T19:25:45.177GMT",
+    "sparkUser" : "irashid",
+    "completed" : true
+  } ]
+}, {
+  "id" : "local-1425081759269",
+  "name" : "Spark shell",
+  "attempts" : [ {
+    "startTime" : "2015-02-28T00:02:38.277GMT",
+    "endTime" : "2015-02-28T00:02:46.912GMT",
+    "sparkUser" : "irashid",
+    "completed" : true
+  } ]
+}, {
+  "id" : "local-1422981780767",
+  "name" : "Spark shell",
+  "attempts" : [ {
+    "startTime" : "2015-02-03T16:42:59.720GMT",
+    "endTime" : "2015-02-03T16:43:08.731GMT",
+    "sparkUser" : "irashid",
+    "completed" : true
+  } ]
+}, {
+  "id" : "local-1422981759269",
+  "name" : "Spark shell",
+  "attempts" : [ {
+    "startTime" : "2015-02-03T16:42:38.277GMT",
+    "endTime" : "2015-02-03T16:42:46.912GMT",
+    "sparkUser" : "irashid",
+    "completed" : true
+  } ]
+} ]
\ No newline at end of file
diff --git a/core/src/test/resources/HistoryServerExpectations/executor_list_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/executor_list_json_expectation.json
new file mode 100644
index 0000000000000..cb622e147249e
--- /dev/null
+++ b/core/src/test/resources/HistoryServerExpectations/executor_list_json_expectation.json
@@ -0,0 +1,17 @@
+[ {
+  "id" : "<driver>",
+  "hostPort" : "localhost:57971",
+  "rddBlocks" : 8,
+  "memoryUsed" : 28000128,
+  "diskUsed" : 0,
+  "activeTasks" : 0,
+  "failedTasks" : 1,
+  "completedTasks" : 31,
+  "totalTasks" : 32,
+  "totalDuration" : 8820,
+  "totalInputBytes" : 28000288,
+  "totalShuffleRead" : 0,
+  "totalShuffleWrite" : 13180,
+  "maxMemory" : 278302556,
+  "executorLogs" : { }
+} ]
\ No newline at end of file
diff --git a/core/src/test/resources/HistoryServerExpectations/failed_stage_list_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/failed_stage_list_json_expectation.json
new file mode 100644
index 0000000000000..bff6a4f69d077
--- /dev/null
+++ b/core/src/test/resources/HistoryServerExpectations/failed_stage_list_json_expectation.json
@@ -0,0 +1,23 @@
+[ {
+  "status" : "FAILED",
+  "stageId" : 2,
+  "attemptId" : 0,
+  "numActiveTasks" : 0,
+  "numCompleteTasks" : 7,
+  "numFailedTasks" : 1,
+  "executorRunTime" : 278,
+  "inputBytes" : 0,
+  "inputRecords" : 0,
+  "outputBytes" : 0,
+  "outputRecords" : 0,
+  "shuffleReadBytes" : 0,
+  "shuffleReadRecords" : 0,
+  "shuffleWriteBytes" : 0,
+  "shuffleWriteRecords" : 0,
+  "memoryBytesSpilled" : 0,
+  "diskBytesSpilled" : 0,
+  "name" : "count at <console>:20",
+  "details" : "org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line11.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:20)\n$line11.$read$$iwC$$iwC$$iwC.<init>(<console>:25)\n$line11.$read$$iwC$$iwC.<init>(<console>:27)\n$line11.$read$$iwC.<init>(<console>:29)\n$line11.$read.<init>(<console>:31)\n$line11.$read$.<init>(<console>:35)\n$line11.$read$.<clinit>(<console>)\n$line11.$eval$.<init>(<console>:7)\n$line11.$eval$.<clinit>(<console>)\n$line11.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)",
+  "schedulingPool" : "default",
+  "accumulatorUpdates" : [ ]
+} ]
\ No newline at end of file
diff --git a/core/src/test/resources/HistoryServerExpectations/job_list_from_multi_attempt_app_json_1__expectation.json b/core/src/test/resources/HistoryServerExpectations/job_list_from_multi_attempt_app_json_1__expectation.json
new file mode 100644
index 0000000000000..2e92e1fa0ec23
--- /dev/null
+++ b/core/src/test/resources/HistoryServerExpectations/job_list_from_multi_attempt_app_json_1__expectation.json
@@ -0,0 +1,15 @@
+[ {
+  "jobId" : 0,
+  "name" : "foreach at <console>:15",
+  "stageIds" : [ 0 ],
+  "status" : "SUCCEEDED",
+  "numTasks" : 8,
+  "numActiveTasks" : 0,
+  "numCompletedTasks" : 8,
+  "numSkippedTasks" : 8,
+  "numFailedTasks" : 0,
+  "numActiveStages" : 0,
+  "numCompletedStages" : 1,
+  "numSkippedStages" : 0,
+  "numFailedStages" : 0
+} ]
\ No newline at end of file
diff --git a/core/src/test/resources/HistoryServerExpectations/job_list_from_multi_attempt_app_json_2__expectation.json b/core/src/test/resources/HistoryServerExpectations/job_list_from_multi_attempt_app_json_2__expectation.json
new file mode 100644
index 0000000000000..2e92e1fa0ec23
--- /dev/null
+++ b/core/src/test/resources/HistoryServerExpectations/job_list_from_multi_attempt_app_json_2__expectation.json
@@ -0,0 +1,15 @@
+[ {
+  "jobId" : 0,
+  "name" : "foreach at <console>:15",
+  "stageIds" : [ 0 ],
+  "status" : "SUCCEEDED",
+  "numTasks" : 8,
+  "numActiveTasks" : 0,
+  "numCompletedTasks" : 8,
+  "numSkippedTasks" : 8,
+  "numFailedTasks" : 0,
+  "numActiveStages" : 0,
+  "numCompletedStages" : 1,
+  "numSkippedStages" : 0,
+  "numFailedStages" : 0
+} ]
\ No newline at end of file
diff --git a/core/src/test/resources/HistoryServerExpectations/job_list_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/job_list_json_expectation.json
new file mode 100644
index 0000000000000..cab4750270dfa
--- /dev/null
+++ b/core/src/test/resources/HistoryServerExpectations/job_list_json_expectation.json
@@ -0,0 +1,43 @@
+[ {
+  "jobId" : 2,
+  "name" : "count at <console>:17",
+  "stageIds" : [ 3 ],
+  "status" : "SUCCEEDED",
+  "numTasks" : 8,
+  "numActiveTasks" : 0,
+  "numCompletedTasks" : 8,
+  "numSkippedTasks" : 8,
+  "numFailedTasks" : 0,
+  "numActiveStages" : 0,
+  "numCompletedStages" : 1,
+  "numSkippedStages" : 0,
+  "numFailedStages" : 0
+}, {
+  "jobId" : 1,
+  "name" : "count at <console>:20",
+  "stageIds" : [ 1, 2 ],
+  "status" : "FAILED",
+  "numTasks" : 16,
+  "numActiveTasks" : 0,
+  "numCompletedTasks" : 15,
+  "numSkippedTasks" : 15,
+  "numFailedTasks" : 1,
+  "numActiveStages" : 0,
+  "numCompletedStages" : 1,
+  "numSkippedStages" : 0,
+  "numFailedStages" : 1
+}, {
+  "jobId" : 0,
+  "name" : "count at <console>:15",
+  "stageIds" : [ 0 ],
+  "status" : "SUCCEEDED",
+  "numTasks" : 8,
+  "numActiveTasks" : 0,
+  "numCompletedTasks" : 8,
+  "numSkippedTasks" : 8,
+  "numFailedTasks" : 0,
+  "numActiveStages" : 0,
+  "numCompletedStages" : 1,
+  "numSkippedStages" : 0,
+  "numFailedStages" : 0
+} ]
\ No newline at end of file
diff --git a/core/src/test/resources/HistoryServerExpectations/maxDate2_app_list_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/maxDate2_app_list_json_expectation.json
new file mode 100644
index 0000000000000..483632a3956ed
--- /dev/null
+++ b/core/src/test/resources/HistoryServerExpectations/maxDate2_app_list_json_expectation.json
@@ -0,0 +1,10 @@
+[ {
+  "id" : "local-1422981759269",
+  "name" : "Spark shell",
+  "attempts" : [ {
+    "startTime" : "2015-02-03T16:42:38.277GMT",
+    "endTime" : "2015-02-03T16:42:46.912GMT",
+    "sparkUser" : "irashid",
+    "completed" : true
+  } ]
+} ]
\ No newline at end of file
diff --git a/core/src/test/resources/HistoryServerExpectations/maxDate_app_list_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/maxDate_app_list_json_expectation.json
new file mode 100644
index 0000000000000..4b85690fd9199
--- /dev/null
+++ b/core/src/test/resources/HistoryServerExpectations/maxDate_app_list_json_expectation.json
@@ -0,0 +1,19 @@
+[ {
+  "id" : "local-1422981780767",
+  "name" : "Spark shell",
+  "attempts" : [ {
+    "startTime" : "2015-02-03T16:42:59.720GMT",
+    "endTime" : "2015-02-03T16:43:08.731GMT",
+    "sparkUser" : "irashid",
+    "completed" : true
+  } ]
+}, {
+  "id" : "local-1422981759269",
+  "name" : "Spark shell",
+  "attempts" : [ {
+    "startTime" : "2015-02-03T16:42:38.277GMT",
+    "endTime" : "2015-02-03T16:42:46.912GMT",
+    "sparkUser" : "irashid",
+    "completed" : true
+  } ]
+} ]
\ No newline at end of file
diff --git a/core/src/test/resources/HistoryServerExpectations/minDate_app_list_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/minDate_app_list_json_expectation.json
new file mode 100644
index 0000000000000..dca86fe5f7e6a
--- /dev/null
+++ b/core/src/test/resources/HistoryServerExpectations/minDate_app_list_json_expectation.json
@@ -0,0 +1,35 @@
+[ {
+  "id" : "local-1430917381534",
+  "name" : "Spark shell",
+  "attempts" : [ {
+    "startTime" : "2015-05-06T13:03:00.893GMT",
+    "endTime" : "2015-05-06T13:03:11.398GMT",
+    "sparkUser" : "irashid",
+    "completed" : true
+  } ]
+}, {
+  "id" : "local-1426533911241",
+  "name" : "Spark shell",
+  "attempts" : [ {
+    "attemptId" : "2",
+    "startTime" : "2015-03-17T23:11:50.242GMT",
+    "endTime" : "2015-03-17T23:12:25.177GMT",
+    "sparkUser" : "irashid",
+    "completed" : true
+  }, {
+    "attemptId" : "1",
+    "startTime" : "2015-03-16T19:25:10.242GMT",
+    "endTime" : "2015-03-16T19:25:45.177GMT",
+    "sparkUser" : "irashid",
+    "completed" : true
+  } ]
+}, {
+  "id" : "local-1425081759269",
+  "name" : "Spark shell",
+  "attempts" : [ {
+    "startTime" : "2015-02-28T00:02:38.277GMT",
+    "endTime" : "2015-02-28T00:02:46.912GMT",
+    "sparkUser" : "irashid",
+    "completed" : true
+  } ]
+} ]
\ No newline at end of file
diff --git a/core/src/test/resources/HistoryServerExpectations/one_app_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/one_app_json_expectation.json
new file mode 100644
index 0000000000000..07489ad96414a
--- /dev/null
+++ b/core/src/test/resources/HistoryServerExpectations/one_app_json_expectation.json
@@ -0,0 +1,10 @@
+{
+  "id" : "local-1422981780767",
+  "name" : "Spark shell",
+  "attempts" : [ {
+    "startTime" : "2015-02-03T16:42:59.720GMT",
+    "endTime" : "2015-02-03T16:43:08.731GMT",
+    "sparkUser" : "irashid",
+    "completed" : true
+  } ]
+}
\ No newline at end of file
diff --git a/core/src/test/resources/HistoryServerExpectations/one_app_multi_attempt_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/one_app_multi_attempt_json_expectation.json
new file mode 100644
index 0000000000000..8f3d7160c723f
--- /dev/null
+++ b/core/src/test/resources/HistoryServerExpectations/one_app_multi_attempt_json_expectation.json
@@ -0,0 +1,17 @@
+{
+  "id" : "local-1426533911241",
+  "name" : "Spark shell",
+  "attempts" : [ {
+    "attemptId" : "2",
+    "startTime" : "2015-03-17T23:11:50.242GMT",
+    "endTime" : "2015-03-17T23:12:25.177GMT",
+    "sparkUser" : "irashid",
+    "completed" : true
+  }, {
+    "attemptId" : "1",
+    "startTime" : "2015-03-16T19:25:10.242GMT",
+    "endTime" : "2015-03-16T19:25:45.177GMT",
+    "sparkUser" : "irashid",
+    "completed" : true
+  } ]
+}
\ No newline at end of file
diff --git a/core/src/test/resources/HistoryServerExpectations/one_job_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/one_job_json_expectation.json
new file mode 100644
index 0000000000000..4a29072bdb6e4
--- /dev/null
+++ b/core/src/test/resources/HistoryServerExpectations/one_job_json_expectation.json
@@ -0,0 +1,15 @@
+{
+  "jobId" : 0,
+  "name" : "count at <console>:15",
+  "stageIds" : [ 0 ],
+  "status" : "SUCCEEDED",
+  "numTasks" : 8,
+  "numActiveTasks" : 0,
+  "numCompletedTasks" : 8,
+  "numSkippedTasks" : 8,
+  "numFailedTasks" : 0,
+  "numActiveStages" : 0,
+  "numCompletedStages" : 1,
+  "numSkippedStages" : 0,
+  "numFailedStages" : 0
+}
\ No newline at end of file
diff --git a/core/src/test/resources/HistoryServerExpectations/one_rdd_storage_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/one_rdd_storage_json_expectation.json
new file mode 100644
index 0000000000000..38b5328ffbb03
--- /dev/null
+++ b/core/src/test/resources/HistoryServerExpectations/one_rdd_storage_json_expectation.json
@@ -0,0 +1,64 @@
+{
+  "id" : 0,
+  "name" : "0",
+  "numPartitions" : 8,
+  "numCachedPartitions" : 8,
+  "storageLevel" : "Memory Deserialized 1x Replicated",
+  "memoryUsed" : 28000128,
+  "diskUsed" : 0,
+  "dataDistribution" : [ {
+    "address" : "localhost:57971",
+    "memoryUsed" : 28000128,
+    "memoryRemaining" : 250302428,
+    "diskUsed" : 0
+  } ],
+  "partitions" : [ {
+    "blockName" : "rdd_0_0",
+    "storageLevel" : "Memory Deserialized 1x Replicated",
+    "memoryUsed" : 3500016,
+    "diskUsed" : 0,
+    "executors" : [ "localhost:57971" ]
+  }, {
+    "blockName" : "rdd_0_1",
+    "storageLevel" : "Memory Deserialized 1x Replicated",
+    "memoryUsed" : 3500016,
+    "diskUsed" : 0,
+    "executors" : [ "localhost:57971" ]
+  }, {
+    "blockName" : "rdd_0_2",
+    "storageLevel" : "Memory Deserialized 1x Replicated",
+    "memoryUsed" : 3500016,
+    "diskUsed" : 0,
+    "executors" : [ "localhost:57971" ]
+  }, {
+    "blockName" : "rdd_0_3",
+    "storageLevel" : "Memory Deserialized 1x Replicated",
+    "memoryUsed" : 3500016,
+    "diskUsed" : 0,
+    "executors" : [ "localhost:57971" ]
+  }, {
+    "blockName" : "rdd_0_4",
+    "storageLevel" : "Memory Deserialized 1x Replicated",
+    "memoryUsed" : 3500016,
+    "diskUsed" : 0,
+    "executors" : [ "localhost:57971" ]
+  }, {
+    "blockName" : "rdd_0_5",
+    "storageLevel" : "Memory Deserialized 1x Replicated",
+    "memoryUsed" : 3500016,
+    "diskUsed" : 0,
+    "executors" : [ "localhost:57971" ]
+  }, {
+    "blockName" : "rdd_0_6",
+    "storageLevel" : "Memory Deserialized 1x Replicated",
+    "memoryUsed" : 3500016,
+    "diskUsed" : 0,
+    "executors" : [ "localhost:57971" ]
+  }, {
+    "blockName" : "rdd_0_7",
+    "storageLevel" : "Memory Deserialized 1x Replicated",
+    "memoryUsed" : 3500016,
+    "diskUsed" : 0,
+    "executors" : [ "localhost:57971" ]
+  } ]
+}
\ No newline at end of file
diff --git a/core/src/test/resources/HistoryServerExpectations/one_stage_attempt_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/one_stage_attempt_json_expectation.json
new file mode 100644
index 0000000000000..111cb8163eb3d
--- /dev/null
+++ b/core/src/test/resources/HistoryServerExpectations/one_stage_attempt_json_expectation.json
@@ -0,0 +1,270 @@
+{
+  "status" : "COMPLETE",
+  "stageId" : 1,
+  "attemptId" : 0,
+  "numActiveTasks" : 0,
+  "numCompleteTasks" : 8,
+  "numFailedTasks" : 0,
+  "executorRunTime" : 3476,
+  "inputBytes" : 28000128,
+  "inputRecords" : 0,
+  "outputBytes" : 0,
+  "outputRecords" : 0,
+  "shuffleReadBytes" : 0,
+  "shuffleReadRecords" : 0,
+  "shuffleWriteBytes" : 13180,
+  "shuffleWriteRecords" : 0,
+  "memoryBytesSpilled" : 0,
+  "diskBytesSpilled" : 0,
+  "name" : "map at <console>:14",
+  "details" : "org.apache.spark.rdd.RDD.map(RDD.scala:271)\n$line10.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:14)\n$line10.$read$$iwC$$iwC$$iwC.<init>(<console>:19)\n$line10.$read$$iwC$$iwC.<init>(<console>:21)\n$line10.$read$$iwC.<init>(<console>:23)\n$line10.$read.<init>(<console>:25)\n$line10.$read$.<init>(<console>:29)\n$line10.$read$.<clinit>(<console>)\n$line10.$eval$.<init>(<console>:7)\n$line10.$eval$.<clinit>(<console>)\n$line10.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)",
+  "schedulingPool" : "default",
+  "accumulatorUpdates" : [ ],
+  "tasks" : {
+    "8" : {
+      "taskId" : 8,
+      "index" : 0,
+      "attempt" : 0,
+      "launchTime" : "2015-02-03T16:43:05.829GMT",
+      "executorId" : "<driver>",
+      "host" : "localhost",
+      "taskLocality" : "PROCESS_LOCAL",
+      "speculative" : false,
+      "accumulatorUpdates" : [ ],
+      "taskMetrics" : {
+        "executorDeserializeTime" : 1,
+        "executorRunTime" : 435,
+        "resultSize" : 1902,
+        "jvmGcTime" : 19,
+        "resultSerializationTime" : 2,
+        "memoryBytesSpilled" : 0,
+        "diskBytesSpilled" : 0,
+        "inputMetrics" : {
+          "bytesRead" : 3500016,
+          "recordsRead" : 0
+        },
+        "shuffleWriteMetrics" : {
+          "bytesWritten" : 1648,
+          "writeTime" : 94000,
+          "recordsWritten" : 0
+        }
+      }
+    },
+    "11" : {
+      "taskId" : 11,
+      "index" : 3,
+      "attempt" : 0,
+      "launchTime" : "2015-02-03T16:43:05.830GMT",
+      "executorId" : "<driver>",
+      "host" : "localhost",
+      "taskLocality" : "PROCESS_LOCAL",
+      "speculative" : false,
+      "accumulatorUpdates" : [ ],
+      "taskMetrics" : {
+        "executorDeserializeTime" : 2,
+        "executorRunTime" : 434,
+        "resultSize" : 1902,
+        "jvmGcTime" : 19,
+        "resultSerializationTime" : 1,
+        "memoryBytesSpilled" : 0,
+        "diskBytesSpilled" : 0,
+        "inputMetrics" : {
+          "bytesRead" : 3500016,
+          "recordsRead" : 0
+        },
+        "shuffleWriteMetrics" : {
+          "bytesWritten" : 1647,
+          "writeTime" : 83000,
+          "recordsWritten" : 0
+        }
+      }
+    },
+    "14" : {
+      "taskId" : 14,
+      "index" : 6,
+      "attempt" : 0,
+      "launchTime" : "2015-02-03T16:43:05.832GMT",
+      "executorId" : "<driver>",
+      "host" : "localhost",
+      "taskLocality" : "PROCESS_LOCAL",
+      "speculative" : false,
+      "accumulatorUpdates" : [ ],
+      "taskMetrics" : {
+        "executorDeserializeTime" : 2,
+        "executorRunTime" : 434,
+        "resultSize" : 1902,
+        "jvmGcTime" : 19,
+        "resultSerializationTime" : 1,
+        "memoryBytesSpilled" : 0,
+        "diskBytesSpilled" : 0,
+        "inputMetrics" : {
+          "bytesRead" : 3500016,
+          "recordsRead" : 0
+        },
+        "shuffleWriteMetrics" : {
+          "bytesWritten" : 1648,
+          "writeTime" : 88000,
+          "recordsWritten" : 0
+        }
+      }
+    },
+    "13" : {
+      "taskId" : 13,
+      "index" : 5,
+      "attempt" : 0,
+      "launchTime" : "2015-02-03T16:43:05.831GMT",
+      "executorId" : "<driver>",
+      "host" : "localhost",
+      "taskLocality" : "PROCESS_LOCAL",
+      "speculative" : false,
+      "accumulatorUpdates" : [ ],
+      "taskMetrics" : {
+        "executorDeserializeTime" : 2,
+        "executorRunTime" : 434,
+        "resultSize" : 1902,
+        "jvmGcTime" : 19,
+        "resultSerializationTime" : 2,
+        "memoryBytesSpilled" : 0,
+        "diskBytesSpilled" : 0,
+        "inputMetrics" : {
+          "bytesRead" : 3500016,
+          "recordsRead" : 0
+        },
+        "shuffleWriteMetrics" : {
+          "bytesWritten" : 1648,
+          "writeTime" : 73000,
+          "recordsWritten" : 0
+        }
+      }
+    },
+    "10" : {
+      "taskId" : 10,
+      "index" : 2,
+      "attempt" : 0,
+      "launchTime" : "2015-02-03T16:43:05.830GMT",
+      "executorId" : "<driver>",
+      "host" : "localhost",
+      "taskLocality" : "PROCESS_LOCAL",
+      "speculative" : false,
+      "accumulatorUpdates" : [ ],
+      "taskMetrics" : {
+        "executorDeserializeTime" : 2,
+        "executorRunTime" : 434,
+        "resultSize" : 1902,
+        "jvmGcTime" : 19,
+        "resultSerializationTime" : 1,
+        "memoryBytesSpilled" : 0,
+        "diskBytesSpilled" : 0,
+        "inputMetrics" : {
+          "bytesRead" : 3500016,
+          "recordsRead" : 0
+        },
+        "shuffleWriteMetrics" : {
+          "bytesWritten" : 1648,
+          "writeTime" : 76000,
+          "recordsWritten" : 0
+        }
+      }
+    },
+    "9" : {
+      "taskId" : 9,
+      "index" : 1,
+      "attempt" : 0,
+      "launchTime" : "2015-02-03T16:43:05.830GMT",
+      "executorId" : "<driver>",
+      "host" : "localhost",
+      "taskLocality" : "PROCESS_LOCAL",
+      "speculative" : false,
+      "accumulatorUpdates" : [ ],
+      "taskMetrics" : {
+        "executorDeserializeTime" : 1,
+        "executorRunTime" : 436,
+        "resultSize" : 1902,
+        "jvmGcTime" : 19,
+        "resultSerializationTime" : 0,
+        "memoryBytesSpilled" : 0,
+        "diskBytesSpilled" : 0,
+        "inputMetrics" : {
+          "bytesRead" : 3500016,
+          "recordsRead" : 0
+        },
+        "shuffleWriteMetrics" : {
+          "bytesWritten" : 1648,
+          "writeTime" : 98000,
+          "recordsWritten" : 0
+        }
+      }
+    },
+    "12" : {
+      "taskId" : 12,
+      "index" : 4,
+      "attempt" : 0,
+      "launchTime" : "2015-02-03T16:43:05.831GMT",
+      "executorId" : "<driver>",
+      "host" : "localhost",
+      "taskLocality" : "PROCESS_LOCAL",
+      "speculative" : false,
+      "accumulatorUpdates" : [ ],
+      "taskMetrics" : {
+        "executorDeserializeTime" : 2,
+        "executorRunTime" : 434,
+        "resultSize" : 1902,
+        "jvmGcTime" : 19,
+        "resultSerializationTime" : 1,
+        "memoryBytesSpilled" : 0,
+        "diskBytesSpilled" : 0,
+        "inputMetrics" : {
+          "bytesRead" : 3500016,
+          "recordsRead" : 0
+        },
+        "shuffleWriteMetrics" : {
+          "bytesWritten" : 1645,
+          "writeTime" : 101000,
+          "recordsWritten" : 0
+        }
+      }
+    },
+    "15" : {
+      "taskId" : 15,
+      "index" : 7,
+      "attempt" : 0,
+      "launchTime" : "2015-02-03T16:43:05.833GMT",
+      "executorId" : "<driver>",
+      "host" : "localhost",
+      "taskLocality" : "PROCESS_LOCAL",
+      "speculative" : false,
+      "accumulatorUpdates" : [ ],
+      "taskMetrics" : {
+        "executorDeserializeTime" : 1,
+        "executorRunTime" : 435,
+        "resultSize" : 1902,
+        "jvmGcTime" : 19,
+        "resultSerializationTime" : 1,
+        "memoryBytesSpilled" : 0,
+        "diskBytesSpilled" : 0,
+        "inputMetrics" : {
+          "bytesRead" : 3500016,
+          "recordsRead" : 0
+        },
+        "shuffleWriteMetrics" : {
+          "bytesWritten" : 1648,
+          "writeTime" : 79000,
+          "recordsWritten" : 0
+        }
+      }
+    }
+  },
+  "executorSummary" : {
+    "<driver>" : {
+      "taskTime" : 3624,
+      "failedTasks" : 0,
+      "succeededTasks" : 8,
+      "inputBytes" : 28000128,
+      "outputBytes" : 0,
+      "shuffleRead" : 0,
+      "shuffleWrite" : 13180,
+      "memoryBytesSpilled" : 0,
+      "diskBytesSpilled" : 0
+    }
+  }
+}
\ No newline at end of file
diff --git a/core/src/test/resources/HistoryServerExpectations/one_stage_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/one_stage_json_expectation.json
new file mode 100644
index 0000000000000..ef339f89afa45
--- /dev/null
+++ b/core/src/test/resources/HistoryServerExpectations/one_stage_json_expectation.json
@@ -0,0 +1,270 @@
+[ {
+  "status" : "COMPLETE",
+  "stageId" : 1,
+  "attemptId" : 0,
+  "numActiveTasks" : 0,
+  "numCompleteTasks" : 8,
+  "numFailedTasks" : 0,
+  "executorRunTime" : 3476,
+  "inputBytes" : 28000128,
+  "inputRecords" : 0,
+  "outputBytes" : 0,
+  "outputRecords" : 0,
+  "shuffleReadBytes" : 0,
+  "shuffleReadRecords" : 0,
+  "shuffleWriteBytes" : 13180,
+  "shuffleWriteRecords" : 0,
+  "memoryBytesSpilled" : 0,
+  "diskBytesSpilled" : 0,
+  "name" : "map at <console>:14",
+  "details" : "org.apache.spark.rdd.RDD.map(RDD.scala:271)\n$line10.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:14)\n$line10.$read$$iwC$$iwC$$iwC.<init>(<console>:19)\n$line10.$read$$iwC$$iwC.<init>(<console>:21)\n$line10.$read$$iwC.<init>(<console>:23)\n$line10.$read.<init>(<console>:25)\n$line10.$read$.<init>(<console>:29)\n$line10.$read$.<clinit>(<console>)\n$line10.$eval$.<init>(<console>:7)\n$line10.$eval$.<clinit>(<console>)\n$line10.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)",
+  "schedulingPool" : "default",
+  "accumulatorUpdates" : [ ],
+  "tasks" : {
+    "8" : {
+      "taskId" : 8,
+      "index" : 0,
+      "attempt" : 0,
+      "launchTime" : "2015-02-03T16:43:05.829GMT",
+      "executorId" : "<driver>",
+      "host" : "localhost",
+      "taskLocality" : "PROCESS_LOCAL",
+      "speculative" : false,
+      "accumulatorUpdates" : [ ],
+      "taskMetrics" : {
+        "executorDeserializeTime" : 1,
+        "executorRunTime" : 435,
+        "resultSize" : 1902,
+        "jvmGcTime" : 19,
+        "resultSerializationTime" : 2,
+        "memoryBytesSpilled" : 0,
+        "diskBytesSpilled" : 0,
+        "inputMetrics" : {
+          "bytesRead" : 3500016,
+          "recordsRead" : 0
+        },
+        "shuffleWriteMetrics" : {
+          "bytesWritten" : 1648,
+          "writeTime" : 94000,
+          "recordsWritten" : 0
+        }
+      }
+    },
+    "11" : {
+      "taskId" : 11,
+      "index" : 3,
+      "attempt" : 0,
+      "launchTime" : "2015-02-03T16:43:05.830GMT",
+      "executorId" : "<driver>",
+      "host" : "localhost",
+      "taskLocality" : "PROCESS_LOCAL",
+      "speculative" : false,
+      "accumulatorUpdates" : [ ],
+      "taskMetrics" : {
+        "executorDeserializeTime" : 2,
+        "executorRunTime" : 434,
+        "resultSize" : 1902,
+        "jvmGcTime" : 19,
+        "resultSerializationTime" : 1,
+        "memoryBytesSpilled" : 0,
+        "diskBytesSpilled" : 0,
+        "inputMetrics" : {
+          "bytesRead" : 3500016,
+          "recordsRead" : 0
+        },
+        "shuffleWriteMetrics" : {
+          "bytesWritten" : 1647,
+          "writeTime" : 83000,
+          "recordsWritten" : 0
+        }
+      }
+    },
+    "14" : {
+      "taskId" : 14,
+      "index" : 6,
+      "attempt" : 0,
+      "launchTime" : "2015-02-03T16:43:05.832GMT",
+      "executorId" : "<driver>",
+      "host" : "localhost",
+      "taskLocality" : "PROCESS_LOCAL",
+      "speculative" : false,
+      "accumulatorUpdates" : [ ],
+      "taskMetrics" : {
+        "executorDeserializeTime" : 2,
+        "executorRunTime" : 434,
+        "resultSize" : 1902,
+        "jvmGcTime" : 19,
+        "resultSerializationTime" : 1,
+        "memoryBytesSpilled" : 0,
+        "diskBytesSpilled" : 0,
+        "inputMetrics" : {
+          "bytesRead" : 3500016,
+          "recordsRead" : 0
+        },
+        "shuffleWriteMetrics" : {
+          "bytesWritten" : 1648,
+          "writeTime" : 88000,
+          "recordsWritten" : 0
+        }
+      }
+    },
+    "13" : {
+      "taskId" : 13,
+      "index" : 5,
+      "attempt" : 0,
+      "launchTime" : "2015-02-03T16:43:05.831GMT",
+      "executorId" : "<driver>",
+      "host" : "localhost",
+      "taskLocality" : "PROCESS_LOCAL",
+      "speculative" : false,
+      "accumulatorUpdates" : [ ],
+      "taskMetrics" : {
+        "executorDeserializeTime" : 2,
+        "executorRunTime" : 434,
+        "resultSize" : 1902,
+        "jvmGcTime" : 19,
+        "resultSerializationTime" : 2,
+        "memoryBytesSpilled" : 0,
+        "diskBytesSpilled" : 0,
+        "inputMetrics" : {
+          "bytesRead" : 3500016,
+          "recordsRead" : 0
+        },
+        "shuffleWriteMetrics" : {
+          "bytesWritten" : 1648,
+          "writeTime" : 73000,
+          "recordsWritten" : 0
+        }
+      }
+    },
+    "10" : {
+      "taskId" : 10,
+      "index" : 2,
+      "attempt" : 0,
+      "launchTime" : "2015-02-03T16:43:05.830GMT",
+      "executorId" : "<driver>",
+      "host" : "localhost",
+      "taskLocality" : "PROCESS_LOCAL",
+      "speculative" : false,
+      "accumulatorUpdates" : [ ],
+      "taskMetrics" : {
+        "executorDeserializeTime" : 2,
+        "executorRunTime" : 434,
+        "resultSize" : 1902,
+        "jvmGcTime" : 19,
+        "resultSerializationTime" : 1,
+        "memoryBytesSpilled" : 0,
+        "diskBytesSpilled" : 0,
+        "inputMetrics" : {
+          "bytesRead" : 3500016,
+          "recordsRead" : 0
+        },
+        "shuffleWriteMetrics" : {
+          "bytesWritten" : 1648,
+          "writeTime" : 76000,
+          "recordsWritten" : 0
+        }
+      }
+    },
+    "9" : {
+      "taskId" : 9,
+      "index" : 1,
+      "attempt" : 0,
+      "launchTime" : "2015-02-03T16:43:05.830GMT",
+      "executorId" : "<driver>",
+      "host" : "localhost",
+      "taskLocality" : "PROCESS_LOCAL",
+      "speculative" : false,
+      "accumulatorUpdates" : [ ],
+      "taskMetrics" : {
+        "executorDeserializeTime" : 1,
+        "executorRunTime" : 436,
+        "resultSize" : 1902,
+        "jvmGcTime" : 19,
+        "resultSerializationTime" : 0,
+        "memoryBytesSpilled" : 0,
+        "diskBytesSpilled" : 0,
+        "inputMetrics" : {
+          "bytesRead" : 3500016,
+          "recordsRead" : 0
+        },
+        "shuffleWriteMetrics" : {
+          "bytesWritten" : 1648,
+          "writeTime" : 98000,
+          "recordsWritten" : 0
+        }
+      }
+    },
+    "12" : {
+      "taskId" : 12,
+      "index" : 4,
+      "attempt" : 0,
+      "launchTime" : "2015-02-03T16:43:05.831GMT",
+      "executorId" : "<driver>",
+      "host" : "localhost",
+      "taskLocality" : "PROCESS_LOCAL",
+      "speculative" : false,
+      "accumulatorUpdates" : [ ],
+      "taskMetrics" : {
+        "executorDeserializeTime" : 2,
+        "executorRunTime" : 434,
+        "resultSize" : 1902,
+        "jvmGcTime" : 19,
+        "resultSerializationTime" : 1,
+        "memoryBytesSpilled" : 0,
+        "diskBytesSpilled" : 0,
+        "inputMetrics" : {
+          "bytesRead" : 3500016,
+          "recordsRead" : 0
+        },
+        "shuffleWriteMetrics" : {
+          "bytesWritten" : 1645,
+          "writeTime" : 101000,
+          "recordsWritten" : 0
+        }
+      }
+    },
+    "15" : {
+      "taskId" : 15,
+      "index" : 7,
+      "attempt" : 0,
+      "launchTime" : "2015-02-03T16:43:05.833GMT",
+      "executorId" : "<driver>",
+      "host" : "localhost",
+      "taskLocality" : "PROCESS_LOCAL",
+      "speculative" : false,
+      "accumulatorUpdates" : [ ],
+      "taskMetrics" : {
+        "executorDeserializeTime" : 1,
+        "executorRunTime" : 435,
+        "resultSize" : 1902,
+        "jvmGcTime" : 19,
+        "resultSerializationTime" : 1,
+        "memoryBytesSpilled" : 0,
+        "diskBytesSpilled" : 0,
+        "inputMetrics" : {
+          "bytesRead" : 3500016,
+          "recordsRead" : 0
+        },
+        "shuffleWriteMetrics" : {
+          "bytesWritten" : 1648,
+          "writeTime" : 79000,
+          "recordsWritten" : 0
+        }
+      }
+    }
+  },
+  "executorSummary" : {
+    "<driver>" : {
+      "taskTime" : 3624,
+      "failedTasks" : 0,
+      "succeededTasks" : 8,
+      "inputBytes" : 28000128,
+      "outputBytes" : 0,
+      "shuffleRead" : 0,
+      "shuffleWrite" : 13180,
+      "memoryBytesSpilled" : 0,
+      "diskBytesSpilled" : 0
+    }
+  }
+} ]
\ No newline at end of file
diff --git a/core/src/test/resources/HistoryServerExpectations/rdd_list_storage_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/rdd_list_storage_json_expectation.json
new file mode 100644
index 0000000000000..f79a31022d214
--- /dev/null
+++ b/core/src/test/resources/HistoryServerExpectations/rdd_list_storage_json_expectation.json
@@ -0,0 +1,9 @@
+[ {
+  "id" : 0,
+  "name" : "0",
+  "numPartitions" : 8,
+  "numCachedPartitions" : 8,
+  "storageLevel" : "Memory Deserialized 1x Replicated",
+  "memoryUsed" : 28000128,
+  "diskUsed" : 0
+} ]
\ No newline at end of file
diff --git a/core/src/test/resources/HistoryServerExpectations/running_app_list_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/running_app_list_json_expectation.json
new file mode 100644
index 0000000000000..8878e547a7984
--- /dev/null
+++ b/core/src/test/resources/HistoryServerExpectations/running_app_list_json_expectation.json
@@ -0,0 +1 @@
+[ ]
\ No newline at end of file
diff --git a/core/src/test/resources/HistoryServerExpectations/stage_list_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/stage_list_json_expectation.json
new file mode 100644
index 0000000000000..056fac7088594
--- /dev/null
+++ b/core/src/test/resources/HistoryServerExpectations/stage_list_json_expectation.json
@@ -0,0 +1,89 @@
+[ {
+  "status" : "COMPLETE",
+  "stageId" : 3,
+  "attemptId" : 0,
+  "numActiveTasks" : 0,
+  "numCompleteTasks" : 8,
+  "numFailedTasks" : 0,
+  "executorRunTime" : 162,
+  "inputBytes" : 160,
+  "inputRecords" : 0,
+  "outputBytes" : 0,
+  "outputRecords" : 0,
+  "shuffleReadBytes" : 0,
+  "shuffleReadRecords" : 0,
+  "shuffleWriteBytes" : 0,
+  "shuffleWriteRecords" : 0,
+  "memoryBytesSpilled" : 0,
+  "diskBytesSpilled" : 0,
+  "name" : "count at <console>:17",
+  "details" : "org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line19.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:17)\n$line19.$read$$iwC$$iwC$$iwC.<init>(<console>:22)\n$line19.$read$$iwC$$iwC.<init>(<console>:24)\n$line19.$read$$iwC.<init>(<console>:26)\n$line19.$read.<init>(<console>:28)\n$line19.$read$.<init>(<console>:32)\n$line19.$read$.<clinit>(<console>)\n$line19.$eval$.<init>(<console>:7)\n$line19.$eval$.<clinit>(<console>)\n$line19.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)",
+  "schedulingPool" : "default",
+  "accumulatorUpdates" : [ ]
+}, {
+  "status" : "COMPLETE",
+  "stageId" : 1,
+  "attemptId" : 0,
+  "numActiveTasks" : 0,
+  "numCompleteTasks" : 8,
+  "numFailedTasks" : 0,
+  "executorRunTime" : 3476,
+  "inputBytes" : 28000128,
+  "inputRecords" : 0,
+  "outputBytes" : 0,
+  "outputRecords" : 0,
+  "shuffleReadBytes" : 0,
+  "shuffleReadRecords" : 0,
+  "shuffleWriteBytes" : 13180,
+  "shuffleWriteRecords" : 0,
+  "memoryBytesSpilled" : 0,
+  "diskBytesSpilled" : 0,
+  "name" : "map at <console>:14",
+  "details" : "org.apache.spark.rdd.RDD.map(RDD.scala:271)\n$line10.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:14)\n$line10.$read$$iwC$$iwC$$iwC.<init>(<console>:19)\n$line10.$read$$iwC$$iwC.<init>(<console>:21)\n$line10.$read$$iwC.<init>(<console>:23)\n$line10.$read.<init>(<console>:25)\n$line10.$read$.<init>(<console>:29)\n$line10.$read$.<clinit>(<console>)\n$line10.$eval$.<init>(<console>:7)\n$line10.$eval$.<clinit>(<console>)\n$line10.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)",
+  "schedulingPool" : "default",
+  "accumulatorUpdates" : [ ]
+}, {
+  "status" : "COMPLETE",
+  "stageId" : 0,
+  "attemptId" : 0,
+  "numActiveTasks" : 0,
+  "numCompleteTasks" : 8,
+  "numFailedTasks" : 0,
+  "executorRunTime" : 4338,
+  "inputBytes" : 0,
+  "inputRecords" : 0,
+  "outputBytes" : 0,
+  "outputRecords" : 0,
+  "shuffleReadBytes" : 0,
+  "shuffleReadRecords" : 0,
+  "shuffleWriteBytes" : 0,
+  "shuffleWriteRecords" : 0,
+  "memoryBytesSpilled" : 0,
+  "diskBytesSpilled" : 0,
+  "name" : "count at <console>:15",
+  "details" : "org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line9.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:15)\n$line9.$read$$iwC$$iwC$$iwC.<init>(<console>:20)\n$line9.$read$$iwC$$iwC.<init>(<console>:22)\n$line9.$read$$iwC.<init>(<console>:24)\n$line9.$read.<init>(<console>:26)\n$line9.$read$.<init>(<console>:30)\n$line9.$read$.<clinit>(<console>)\n$line9.$eval$.<init>(<console>:7)\n$line9.$eval$.<clinit>(<console>)\n$line9.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)",
+  "schedulingPool" : "default",
+  "accumulatorUpdates" : [ ]
+}, {
+  "status" : "FAILED",
+  "stageId" : 2,
+  "attemptId" : 0,
+  "numActiveTasks" : 0,
+  "numCompleteTasks" : 7,
+  "numFailedTasks" : 1,
+  "executorRunTime" : 278,
+  "inputBytes" : 0,
+  "inputRecords" : 0,
+  "outputBytes" : 0,
+  "outputRecords" : 0,
+  "shuffleReadBytes" : 0,
+  "shuffleReadRecords" : 0,
+  "shuffleWriteBytes" : 0,
+  "shuffleWriteRecords" : 0,
+  "memoryBytesSpilled" : 0,
+  "diskBytesSpilled" : 0,
+  "name" : "count at <console>:20",
+  "details" : "org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line11.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:20)\n$line11.$read$$iwC$$iwC$$iwC.<init>(<console>:25)\n$line11.$read$$iwC$$iwC.<init>(<console>:27)\n$line11.$read$$iwC.<init>(<console>:29)\n$line11.$read.<init>(<console>:31)\n$line11.$read$.<init>(<console>:35)\n$line11.$read$.<clinit>(<console>)\n$line11.$eval$.<init>(<console>:7)\n$line11.$eval$.<clinit>(<console>)\n$line11.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)",
+  "schedulingPool" : "default",
+  "accumulatorUpdates" : [ ]
+} ]
\ No newline at end of file
diff --git a/core/src/test/resources/HistoryServerExpectations/stage_list_with_accumulable_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/stage_list_with_accumulable_json_expectation.json
new file mode 100644
index 0000000000000..79ccacd309693
--- /dev/null
+++ b/core/src/test/resources/HistoryServerExpectations/stage_list_with_accumulable_json_expectation.json
@@ -0,0 +1,27 @@
+[ {
+  "status" : "COMPLETE",
+  "stageId" : 0,
+  "attemptId" : 0,
+  "numActiveTasks" : 0,
+  "numCompleteTasks" : 8,
+  "numFailedTasks" : 0,
+  "executorRunTime" : 120,
+  "inputBytes" : 0,
+  "inputRecords" : 0,
+  "outputBytes" : 0,
+  "outputRecords" : 0,
+  "shuffleReadBytes" : 0,
+  "shuffleReadRecords" : 0,
+  "shuffleWriteBytes" : 0,
+  "shuffleWriteRecords" : 0,
+  "memoryBytesSpilled" : 0,
+  "diskBytesSpilled" : 0,
+  "name" : "foreach at <console>:15",
+  "details" : "org.apache.spark.rdd.RDD.foreach(RDD.scala:765)\n$line9.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:15)\n$line9.$read$$iwC$$iwC$$iwC.<init>(<console>:20)\n$line9.$read$$iwC$$iwC.<init>(<console>:22)\n$line9.$read$$iwC.<init>(<console>:24)\n$line9.$read.<init>(<console>:26)\n$line9.$read$.<init>(<console>:30)\n$line9.$read$.<clinit>(<console>)\n$line9.$eval$.<init>(<console>:7)\n$line9.$eval$.<clinit>(<console>)\n$line9.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:483)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)",
+  "schedulingPool" : "default",
+  "accumulatorUpdates" : [ {
+    "id" : 1,
+    "name" : "my counter",
+    "value" : "5050"
+  } ]
+} ]
\ No newline at end of file
diff --git a/core/src/test/resources/HistoryServerExpectations/stage_task_list_expectation.json b/core/src/test/resources/HistoryServerExpectations/stage_task_list_expectation.json
new file mode 100644
index 0000000000000..f2cb29b31c85f
--- /dev/null
+++ b/core/src/test/resources/HistoryServerExpectations/stage_task_list_expectation.json
@@ -0,0 +1,561 @@
+[ {
+  "taskId" : 0,
+  "index" : 0,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:06.494GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 32,
+    "executorRunTime" : 349,
+    "resultSize" : 2010,
+    "jvmGcTime" : 7,
+    "resultSerializationTime" : 1,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 49294,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 3842811,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 1,
+  "index" : 1,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:06.502GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 31,
+    "executorRunTime" : 350,
+    "resultSize" : 2010,
+    "jvmGcTime" : 7,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 60488,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 3934399,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 2,
+  "index" : 2,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:06.503GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 32,
+    "executorRunTime" : 348,
+    "resultSize" : 2010,
+    "jvmGcTime" : 7,
+    "resultSerializationTime" : 2,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 60488,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 89885,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 3,
+  "index" : 3,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:06.504GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 31,
+    "executorRunTime" : 349,
+    "resultSize" : 2010,
+    "jvmGcTime" : 7,
+    "resultSerializationTime" : 2,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 60488,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 1311694,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 4,
+  "index" : 4,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:06.504GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 31,
+    "executorRunTime" : 349,
+    "resultSize" : 2010,
+    "jvmGcTime" : 7,
+    "resultSerializationTime" : 1,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 60488,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 83022,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 5,
+  "index" : 5,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:06.505GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 30,
+    "executorRunTime" : 350,
+    "resultSize" : 2010,
+    "jvmGcTime" : 7,
+    "resultSerializationTime" : 1,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 60488,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 3675510,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 6,
+  "index" : 6,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:06.505GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 29,
+    "executorRunTime" : 351,
+    "resultSize" : 2010,
+    "jvmGcTime" : 7,
+    "resultSerializationTime" : 1,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 60488,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 4016617,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 7,
+  "index" : 7,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:06.506GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 31,
+    "executorRunTime" : 349,
+    "resultSize" : 2010,
+    "jvmGcTime" : 7,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 60488,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 2579051,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 8,
+  "index" : 8,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:06.914GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 3,
+    "executorRunTime" : 80,
+    "resultSize" : 2010,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 60488,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 121551,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 9,
+  "index" : 9,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:06.915GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 9,
+    "executorRunTime" : 84,
+    "resultSize" : 2010,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 60489,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 101664,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 10,
+  "index" : 10,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:06.916GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 8,
+    "executorRunTime" : 73,
+    "resultSize" : 2010,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 94709,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 11,
+  "index" : 11,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:06.918GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 6,
+    "executorRunTime" : 75,
+    "resultSize" : 2010,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 94507,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 12,
+  "index" : 12,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:06.923GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 9,
+    "executorRunTime" : 77,
+    "resultSize" : 2010,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 102476,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 13,
+  "index" : 13,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:06.924GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 9,
+    "executorRunTime" : 76,
+    "resultSize" : 2010,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 95004,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 14,
+  "index" : 14,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:06.925GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 6,
+    "executorRunTime" : 83,
+    "resultSize" : 2010,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 95646,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 15,
+  "index" : 15,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:06.928GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 3,
+    "executorRunTime" : 76,
+    "resultSize" : 2010,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 602780,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 16,
+  "index" : 16,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.001GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 10,
+    "executorRunTime" : 84,
+    "resultSize" : 2010,
+    "jvmGcTime" : 5,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 108320,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 17,
+  "index" : 17,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.005GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 11,
+    "executorRunTime" : 91,
+    "resultSize" : 2065,
+    "jvmGcTime" : 5,
+    "resultSerializationTime" : 1,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 99944,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 18,
+  "index" : 18,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.010GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 10,
+    "executorRunTime" : 92,
+    "resultSize" : 2065,
+    "jvmGcTime" : 5,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 100836,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 19,
+  "index" : 19,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.012GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 5,
+    "executorRunTime" : 84,
+    "resultSize" : 2010,
+    "jvmGcTime" : 5,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 95788,
+      "recordsWritten" : 10
+    }
+  }
+} ]
\ No newline at end of file
diff --git a/core/src/test/resources/HistoryServerExpectations/stage_task_list_from_multi_attempt_app_json_1__expectation.json b/core/src/test/resources/HistoryServerExpectations/stage_task_list_from_multi_attempt_app_json_1__expectation.json
new file mode 100644
index 0000000000000..c3febc5fc9447
--- /dev/null
+++ b/core/src/test/resources/HistoryServerExpectations/stage_task_list_from_multi_attempt_app_json_1__expectation.json
@@ -0,0 +1,193 @@
+[ {
+  "taskId" : 0,
+  "index" : 0,
+  "attempt" : 0,
+  "launchTime" : "2015-03-16T19:25:36.515GMT",
+  "executorId" : "<driver>",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ {
+    "id" : 1,
+    "name" : "my counter",
+    "update" : "78",
+    "value" : "5050"
+  } ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 14,
+    "executorRunTime" : 15,
+    "resultSize" : 697,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 2,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0
+  }
+}, {
+  "taskId" : 1,
+  "index" : 1,
+  "attempt" : 0,
+  "launchTime" : "2015-03-16T19:25:36.521GMT",
+  "executorId" : "<driver>",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ {
+    "id" : 1,
+    "name" : "my counter",
+    "update" : "247",
+    "value" : "2175"
+  } ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 14,
+    "executorRunTime" : 15,
+    "resultSize" : 697,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 2,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0
+  }
+}, {
+  "taskId" : 2,
+  "index" : 2,
+  "attempt" : 0,
+  "launchTime" : "2015-03-16T19:25:36.522GMT",
+  "executorId" : "<driver>",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ {
+    "id" : 1,
+    "name" : "my counter",
+    "update" : "378",
+    "value" : "378"
+  } ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 13,
+    "executorRunTime" : 15,
+    "resultSize" : 697,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 2,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0
+  }
+}, {
+  "taskId" : 3,
+  "index" : 3,
+  "attempt" : 0,
+  "launchTime" : "2015-03-16T19:25:36.522GMT",
+  "executorId" : "<driver>",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ {
+    "id" : 1,
+    "name" : "my counter",
+    "update" : "572",
+    "value" : "950"
+  } ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 13,
+    "executorRunTime" : 15,
+    "resultSize" : 697,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 2,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0
+  }
+}, {
+  "taskId" : 4,
+  "index" : 4,
+  "attempt" : 0,
+  "launchTime" : "2015-03-16T19:25:36.522GMT",
+  "executorId" : "<driver>",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ {
+    "id" : 1,
+    "name" : "my counter",
+    "update" : "678",
+    "value" : "2853"
+  } ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 12,
+    "executorRunTime" : 15,
+    "resultSize" : 697,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 1,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0
+  }
+}, {
+  "taskId" : 5,
+  "index" : 5,
+  "attempt" : 0,
+  "launchTime" : "2015-03-16T19:25:36.523GMT",
+  "executorId" : "<driver>",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ {
+    "id" : 1,
+    "name" : "my counter",
+    "update" : "897",
+    "value" : "3750"
+  } ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 12,
+    "executorRunTime" : 15,
+    "resultSize" : 697,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 2,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0
+  }
+}, {
+  "taskId" : 6,
+  "index" : 6,
+  "attempt" : 0,
+  "launchTime" : "2015-03-16T19:25:36.523GMT",
+  "executorId" : "<driver>",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ {
+    "id" : 1,
+    "name" : "my counter",
+    "update" : "978",
+    "value" : "1928"
+  } ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 12,
+    "executorRunTime" : 15,
+    "resultSize" : 697,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 2,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0
+  }
+}, {
+  "taskId" : 7,
+  "index" : 7,
+  "attempt" : 0,
+  "launchTime" : "2015-03-16T19:25:36.524GMT",
+  "executorId" : "<driver>",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ {
+    "id" : 1,
+    "name" : "my counter",
+    "update" : "1222",
+    "value" : "4972"
+  } ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 12,
+    "executorRunTime" : 15,
+    "resultSize" : 697,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 2,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0
+  }
+} ]
\ No newline at end of file
diff --git a/core/src/test/resources/HistoryServerExpectations/stage_task_list_from_multi_attempt_app_json_2__expectation.json b/core/src/test/resources/HistoryServerExpectations/stage_task_list_from_multi_attempt_app_json_2__expectation.json
new file mode 100644
index 0000000000000..56d667d88917c
--- /dev/null
+++ b/core/src/test/resources/HistoryServerExpectations/stage_task_list_from_multi_attempt_app_json_2__expectation.json
@@ -0,0 +1,193 @@
+[ {
+  "taskId" : 0,
+  "index" : 0,
+  "attempt" : 0,
+  "launchTime" : "2015-03-17T23:12:16.515GMT",
+  "executorId" : "<driver>",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ {
+    "id" : 1,
+    "name" : "my counter",
+    "update" : "78",
+    "value" : "5050"
+  } ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 14,
+    "executorRunTime" : 15,
+    "resultSize" : 697,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 2,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0
+  }
+}, {
+  "taskId" : 1,
+  "index" : 1,
+  "attempt" : 0,
+  "launchTime" : "2015-03-17T23:12:16.521GMT",
+  "executorId" : "<driver>",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ {
+    "id" : 1,
+    "name" : "my counter",
+    "update" : "247",
+    "value" : "2175"
+  } ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 14,
+    "executorRunTime" : 15,
+    "resultSize" : 697,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 2,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0
+  }
+}, {
+  "taskId" : 2,
+  "index" : 2,
+  "attempt" : 0,
+  "launchTime" : "2015-03-17T23:12:16.522GMT",
+  "executorId" : "<driver>",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ {
+    "id" : 1,
+    "name" : "my counter",
+    "update" : "378",
+    "value" : "378"
+  } ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 13,
+    "executorRunTime" : 15,
+    "resultSize" : 697,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 2,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0
+  }
+}, {
+  "taskId" : 3,
+  "index" : 3,
+  "attempt" : 0,
+  "launchTime" : "2015-03-17T23:12:16.522GMT",
+  "executorId" : "<driver>",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ {
+    "id" : 1,
+    "name" : "my counter",
+    "update" : "572",
+    "value" : "950"
+  } ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 13,
+    "executorRunTime" : 15,
+    "resultSize" : 697,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 2,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0
+  }
+}, {
+  "taskId" : 4,
+  "index" : 4,
+  "attempt" : 0,
+  "launchTime" : "2015-03-17T23:12:16.522GMT",
+  "executorId" : "<driver>",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ {
+    "id" : 1,
+    "name" : "my counter",
+    "update" : "678",
+    "value" : "2853"
+  } ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 12,
+    "executorRunTime" : 15,
+    "resultSize" : 697,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 1,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0
+  }
+}, {
+  "taskId" : 5,
+  "index" : 5,
+  "attempt" : 0,
+  "launchTime" : "2015-03-17T23:12:16.523GMT",
+  "executorId" : "<driver>",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ {
+    "id" : 1,
+    "name" : "my counter",
+    "update" : "897",
+    "value" : "3750"
+  } ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 12,
+    "executorRunTime" : 15,
+    "resultSize" : 697,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 2,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0
+  }
+}, {
+  "taskId" : 6,
+  "index" : 6,
+  "attempt" : 0,
+  "launchTime" : "2015-03-17T23:12:16.523GMT",
+  "executorId" : "<driver>",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ {
+    "id" : 1,
+    "name" : "my counter",
+    "update" : "978",
+    "value" : "1928"
+  } ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 12,
+    "executorRunTime" : 15,
+    "resultSize" : 697,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 2,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0
+  }
+}, {
+  "taskId" : 7,
+  "index" : 7,
+  "attempt" : 0,
+  "launchTime" : "2015-03-17T23:12:16.524GMT",
+  "executorId" : "<driver>",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ {
+    "id" : 1,
+    "name" : "my counter",
+    "update" : "1222",
+    "value" : "4972"
+  } ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 12,
+    "executorRunTime" : 15,
+    "resultSize" : 697,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 2,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0
+  }
+} ]
\ No newline at end of file
diff --git a/core/src/test/resources/HistoryServerExpectations/stage_task_list_w__offset___length_expectation.json b/core/src/test/resources/HistoryServerExpectations/stage_task_list_w__offset___length_expectation.json
new file mode 100644
index 0000000000000..e5ec3bc4c7126
--- /dev/null
+++ b/core/src/test/resources/HistoryServerExpectations/stage_task_list_w__offset___length_expectation.json
@@ -0,0 +1,1401 @@
+[ {
+  "taskId" : 10,
+  "index" : 10,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:06.916GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 8,
+    "executorRunTime" : 73,
+    "resultSize" : 2010,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 94709,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 11,
+  "index" : 11,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:06.918GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 6,
+    "executorRunTime" : 75,
+    "resultSize" : 2010,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 94507,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 12,
+  "index" : 12,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:06.923GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 9,
+    "executorRunTime" : 77,
+    "resultSize" : 2010,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 102476,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 13,
+  "index" : 13,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:06.924GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 9,
+    "executorRunTime" : 76,
+    "resultSize" : 2010,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 95004,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 14,
+  "index" : 14,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:06.925GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 6,
+    "executorRunTime" : 83,
+    "resultSize" : 2010,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 95646,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 15,
+  "index" : 15,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:06.928GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 3,
+    "executorRunTime" : 76,
+    "resultSize" : 2010,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 602780,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 16,
+  "index" : 16,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.001GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 10,
+    "executorRunTime" : 84,
+    "resultSize" : 2010,
+    "jvmGcTime" : 5,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 108320,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 17,
+  "index" : 17,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.005GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 11,
+    "executorRunTime" : 91,
+    "resultSize" : 2065,
+    "jvmGcTime" : 5,
+    "resultSerializationTime" : 1,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 99944,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 18,
+  "index" : 18,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.010GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 10,
+    "executorRunTime" : 92,
+    "resultSize" : 2065,
+    "jvmGcTime" : 5,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 100836,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 19,
+  "index" : 19,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.012GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 5,
+    "executorRunTime" : 84,
+    "resultSize" : 2010,
+    "jvmGcTime" : 5,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 95788,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 20,
+  "index" : 20,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.014GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 3,
+    "executorRunTime" : 83,
+    "resultSize" : 2010,
+    "jvmGcTime" : 5,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 97716,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 21,
+  "index" : 21,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.015GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 5,
+    "executorRunTime" : 88,
+    "resultSize" : 2010,
+    "jvmGcTime" : 5,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 100270,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 22,
+  "index" : 22,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.018GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 5,
+    "executorRunTime" : 93,
+    "resultSize" : 2065,
+    "jvmGcTime" : 5,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 143427,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 23,
+  "index" : 23,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.031GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 3,
+    "executorRunTime" : 65,
+    "resultSize" : 2010,
+    "jvmGcTime" : 5,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 91844,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 24,
+  "index" : 24,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.098GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 3,
+    "executorRunTime" : 43,
+    "resultSize" : 2065,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 1,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 157194,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 25,
+  "index" : 25,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.103GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 3,
+    "executorRunTime" : 49,
+    "resultSize" : 2065,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 94134,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 26,
+  "index" : 26,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.105GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 6,
+    "executorRunTime" : 38,
+    "resultSize" : 2065,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 108213,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 27,
+  "index" : 27,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.110GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 3,
+    "executorRunTime" : 32,
+    "resultSize" : 2065,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 102019,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 28,
+  "index" : 28,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.113GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 3,
+    "executorRunTime" : 29,
+    "resultSize" : 2065,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 104299,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 29,
+  "index" : 29,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.114GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 2,
+    "executorRunTime" : 39,
+    "resultSize" : 2065,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 114938,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 30,
+  "index" : 30,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.118GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 4,
+    "executorRunTime" : 34,
+    "resultSize" : 2065,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 119770,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 31,
+  "index" : 31,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.127GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 36,
+    "executorRunTime" : 24,
+    "resultSize" : 2065,
+    "jvmGcTime" : 5,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 92619,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 32,
+  "index" : 32,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.148GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 3,
+    "executorRunTime" : 17,
+    "resultSize" : 2065,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 89603,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 33,
+  "index" : 33,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.149GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 5,
+    "executorRunTime" : 43,
+    "resultSize" : 2065,
+    "jvmGcTime" : 5,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 118329,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 34,
+  "index" : 34,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.156GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 6,
+    "executorRunTime" : 27,
+    "resultSize" : 2065,
+    "jvmGcTime" : 5,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 127746,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 35,
+  "index" : 35,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.161GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 5,
+    "executorRunTime" : 35,
+    "resultSize" : 2065,
+    "jvmGcTime" : 5,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 160963,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 36,
+  "index" : 36,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.164GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 5,
+    "executorRunTime" : 29,
+    "resultSize" : 2065,
+    "jvmGcTime" : 5,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 123855,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 37,
+  "index" : 37,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.165GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 4,
+    "executorRunTime" : 32,
+    "resultSize" : 2065,
+    "jvmGcTime" : 5,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 111869,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 38,
+  "index" : 38,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.166GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 3,
+    "executorRunTime" : 31,
+    "resultSize" : 2065,
+    "jvmGcTime" : 5,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 131158,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 39,
+  "index" : 39,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.180GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 2,
+    "executorRunTime" : 17,
+    "resultSize" : 2065,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 98748,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 40,
+  "index" : 40,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.197GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 4,
+    "executorRunTime" : 14,
+    "resultSize" : 2065,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 94792,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 41,
+  "index" : 41,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.200GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 2,
+    "executorRunTime" : 16,
+    "resultSize" : 2065,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 90765,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 42,
+  "index" : 42,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.203GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 10,
+    "executorRunTime" : 17,
+    "resultSize" : 2065,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 103713,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 43,
+  "index" : 43,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.204GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 2,
+    "executorRunTime" : 16,
+    "resultSize" : 2065,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 171516,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 44,
+  "index" : 44,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.205GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 3,
+    "executorRunTime" : 18,
+    "resultSize" : 2065,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 98293,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 45,
+  "index" : 45,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.206GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 2,
+    "executorRunTime" : 19,
+    "resultSize" : 2065,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 92985,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 46,
+  "index" : 46,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.210GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 1,
+    "executorRunTime" : 31,
+    "resultSize" : 2065,
+    "jvmGcTime" : 6,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 113322,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 47,
+  "index" : 47,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.212GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 2,
+    "executorRunTime" : 18,
+    "resultSize" : 2065,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 103015,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 48,
+  "index" : 48,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.220GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 3,
+    "executorRunTime" : 24,
+    "resultSize" : 2065,
+    "jvmGcTime" : 6,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 139844,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 49,
+  "index" : 49,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.223GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 7,
+    "executorRunTime" : 23,
+    "resultSize" : 2065,
+    "jvmGcTime" : 6,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 94984,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 50,
+  "index" : 50,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.240GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 4,
+    "executorRunTime" : 18,
+    "resultSize" : 2065,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 90836,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 51,
+  "index" : 51,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.242GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 2,
+    "executorRunTime" : 17,
+    "resultSize" : 2065,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 96013,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 52,
+  "index" : 52,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.243GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 5,
+    "executorRunTime" : 18,
+    "resultSize" : 2065,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 89664,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 53,
+  "index" : 53,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.244GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 6,
+    "executorRunTime" : 18,
+    "resultSize" : 2065,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 92835,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 54,
+  "index" : 54,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.244GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 3,
+    "executorRunTime" : 18,
+    "resultSize" : 2065,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 90506,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 55,
+  "index" : 55,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.246GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 4,
+    "executorRunTime" : 21,
+    "resultSize" : 2065,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 108309,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 56,
+  "index" : 56,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.249GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 5,
+    "executorRunTime" : 20,
+    "resultSize" : 2065,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 90329,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 57,
+  "index" : 57,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.257GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 3,
+    "executorRunTime" : 16,
+    "resultSize" : 2065,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 96849,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 58,
+  "index" : 58,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.263GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 3,
+    "executorRunTime" : 16,
+    "resultSize" : 2065,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 97521,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 59,
+  "index" : 59,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.265GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 3,
+    "executorRunTime" : 17,
+    "resultSize" : 2065,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 100753,
+      "recordsWritten" : 10
+    }
+  }
+} ]
\ No newline at end of file
diff --git a/core/src/test/resources/HistoryServerExpectations/stage_task_list_w__sortBy_expectation.json b/core/src/test/resources/HistoryServerExpectations/stage_task_list_w__sortBy_expectation.json
new file mode 100644
index 0000000000000..5657123a2db15
--- /dev/null
+++ b/core/src/test/resources/HistoryServerExpectations/stage_task_list_w__sortBy_expectation.json
@@ -0,0 +1,561 @@
+[ {
+  "taskId" : 6,
+  "index" : 6,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:06.505GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 29,
+    "executorRunTime" : 351,
+    "resultSize" : 2010,
+    "jvmGcTime" : 7,
+    "resultSerializationTime" : 1,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 60488,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 4016617,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 5,
+  "index" : 5,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:06.505GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 30,
+    "executorRunTime" : 350,
+    "resultSize" : 2010,
+    "jvmGcTime" : 7,
+    "resultSerializationTime" : 1,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 60488,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 3675510,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 1,
+  "index" : 1,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:06.502GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 31,
+    "executorRunTime" : 350,
+    "resultSize" : 2010,
+    "jvmGcTime" : 7,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 60488,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 3934399,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 4,
+  "index" : 4,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:06.504GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 31,
+    "executorRunTime" : 349,
+    "resultSize" : 2010,
+    "jvmGcTime" : 7,
+    "resultSerializationTime" : 1,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 60488,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 83022,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 7,
+  "index" : 7,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:06.506GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 31,
+    "executorRunTime" : 349,
+    "resultSize" : 2010,
+    "jvmGcTime" : 7,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 60488,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 2579051,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 3,
+  "index" : 3,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:06.504GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 31,
+    "executorRunTime" : 349,
+    "resultSize" : 2010,
+    "jvmGcTime" : 7,
+    "resultSerializationTime" : 2,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 60488,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 1311694,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 0,
+  "index" : 0,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:06.494GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 32,
+    "executorRunTime" : 349,
+    "resultSize" : 2010,
+    "jvmGcTime" : 7,
+    "resultSerializationTime" : 1,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 49294,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 3842811,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 2,
+  "index" : 2,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:06.503GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 32,
+    "executorRunTime" : 348,
+    "resultSize" : 2010,
+    "jvmGcTime" : 7,
+    "resultSerializationTime" : 2,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 60488,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 89885,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 22,
+  "index" : 22,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.018GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 5,
+    "executorRunTime" : 93,
+    "resultSize" : 2065,
+    "jvmGcTime" : 5,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 143427,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 18,
+  "index" : 18,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.010GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 10,
+    "executorRunTime" : 92,
+    "resultSize" : 2065,
+    "jvmGcTime" : 5,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 100836,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 17,
+  "index" : 17,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.005GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 11,
+    "executorRunTime" : 91,
+    "resultSize" : 2065,
+    "jvmGcTime" : 5,
+    "resultSerializationTime" : 1,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 99944,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 21,
+  "index" : 21,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.015GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 5,
+    "executorRunTime" : 88,
+    "resultSize" : 2010,
+    "jvmGcTime" : 5,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 100270,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 16,
+  "index" : 16,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.001GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 10,
+    "executorRunTime" : 84,
+    "resultSize" : 2010,
+    "jvmGcTime" : 5,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 108320,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 19,
+  "index" : 19,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.012GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 5,
+    "executorRunTime" : 84,
+    "resultSize" : 2010,
+    "jvmGcTime" : 5,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 95788,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 9,
+  "index" : 9,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:06.915GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 9,
+    "executorRunTime" : 84,
+    "resultSize" : 2010,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 60489,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 101664,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 20,
+  "index" : 20,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.014GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 3,
+    "executorRunTime" : 83,
+    "resultSize" : 2010,
+    "jvmGcTime" : 5,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 97716,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 14,
+  "index" : 14,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:06.925GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 6,
+    "executorRunTime" : 83,
+    "resultSize" : 2010,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 95646,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 8,
+  "index" : 8,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:06.914GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 3,
+    "executorRunTime" : 80,
+    "resultSize" : 2010,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 60488,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 121551,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 12,
+  "index" : 12,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:06.923GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 9,
+    "executorRunTime" : 77,
+    "resultSize" : 2010,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 102476,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 13,
+  "index" : 13,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:06.924GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 9,
+    "executorRunTime" : 76,
+    "resultSize" : 2010,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 95004,
+      "recordsWritten" : 10
+    }
+  }
+} ]
\ No newline at end of file
diff --git a/core/src/test/resources/HistoryServerExpectations/stage_task_list_w__sortBy_short_names___runtime_expectation.json b/core/src/test/resources/HistoryServerExpectations/stage_task_list_w__sortBy_short_names___runtime_expectation.json
new file mode 100644
index 0000000000000..5657123a2db15
--- /dev/null
+++ b/core/src/test/resources/HistoryServerExpectations/stage_task_list_w__sortBy_short_names___runtime_expectation.json
@@ -0,0 +1,561 @@
+[ {
+  "taskId" : 6,
+  "index" : 6,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:06.505GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 29,
+    "executorRunTime" : 351,
+    "resultSize" : 2010,
+    "jvmGcTime" : 7,
+    "resultSerializationTime" : 1,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 60488,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 4016617,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 5,
+  "index" : 5,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:06.505GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 30,
+    "executorRunTime" : 350,
+    "resultSize" : 2010,
+    "jvmGcTime" : 7,
+    "resultSerializationTime" : 1,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 60488,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 3675510,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 1,
+  "index" : 1,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:06.502GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 31,
+    "executorRunTime" : 350,
+    "resultSize" : 2010,
+    "jvmGcTime" : 7,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 60488,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 3934399,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 4,
+  "index" : 4,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:06.504GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 31,
+    "executorRunTime" : 349,
+    "resultSize" : 2010,
+    "jvmGcTime" : 7,
+    "resultSerializationTime" : 1,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 60488,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 83022,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 7,
+  "index" : 7,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:06.506GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 31,
+    "executorRunTime" : 349,
+    "resultSize" : 2010,
+    "jvmGcTime" : 7,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 60488,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 2579051,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 3,
+  "index" : 3,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:06.504GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 31,
+    "executorRunTime" : 349,
+    "resultSize" : 2010,
+    "jvmGcTime" : 7,
+    "resultSerializationTime" : 2,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 60488,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 1311694,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 0,
+  "index" : 0,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:06.494GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 32,
+    "executorRunTime" : 349,
+    "resultSize" : 2010,
+    "jvmGcTime" : 7,
+    "resultSerializationTime" : 1,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 49294,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 3842811,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 2,
+  "index" : 2,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:06.503GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 32,
+    "executorRunTime" : 348,
+    "resultSize" : 2010,
+    "jvmGcTime" : 7,
+    "resultSerializationTime" : 2,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 60488,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 89885,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 22,
+  "index" : 22,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.018GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 5,
+    "executorRunTime" : 93,
+    "resultSize" : 2065,
+    "jvmGcTime" : 5,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 143427,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 18,
+  "index" : 18,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.010GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 10,
+    "executorRunTime" : 92,
+    "resultSize" : 2065,
+    "jvmGcTime" : 5,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 100836,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 17,
+  "index" : 17,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.005GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 11,
+    "executorRunTime" : 91,
+    "resultSize" : 2065,
+    "jvmGcTime" : 5,
+    "resultSerializationTime" : 1,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 99944,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 21,
+  "index" : 21,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.015GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 5,
+    "executorRunTime" : 88,
+    "resultSize" : 2010,
+    "jvmGcTime" : 5,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 100270,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 16,
+  "index" : 16,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.001GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 10,
+    "executorRunTime" : 84,
+    "resultSize" : 2010,
+    "jvmGcTime" : 5,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 108320,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 19,
+  "index" : 19,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.012GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 5,
+    "executorRunTime" : 84,
+    "resultSize" : 2010,
+    "jvmGcTime" : 5,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 95788,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 9,
+  "index" : 9,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:06.915GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 9,
+    "executorRunTime" : 84,
+    "resultSize" : 2010,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 60489,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 101664,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 20,
+  "index" : 20,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.014GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 3,
+    "executorRunTime" : 83,
+    "resultSize" : 2010,
+    "jvmGcTime" : 5,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 97716,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 14,
+  "index" : 14,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:06.925GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 6,
+    "executorRunTime" : 83,
+    "resultSize" : 2010,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 95646,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 8,
+  "index" : 8,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:06.914GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 3,
+    "executorRunTime" : 80,
+    "resultSize" : 2010,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 60488,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 121551,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 12,
+  "index" : 12,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:06.923GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 9,
+    "executorRunTime" : 77,
+    "resultSize" : 2010,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 102476,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 13,
+  "index" : 13,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:06.924GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 9,
+    "executorRunTime" : 76,
+    "resultSize" : 2010,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 95004,
+      "recordsWritten" : 10
+    }
+  }
+} ]
\ No newline at end of file
diff --git a/core/src/test/resources/HistoryServerExpectations/stage_task_list_w__sortBy_short_names__runtime_expectation.json b/core/src/test/resources/HistoryServerExpectations/stage_task_list_w__sortBy_short_names__runtime_expectation.json
new file mode 100644
index 0000000000000..72fe017e9f85d
--- /dev/null
+++ b/core/src/test/resources/HistoryServerExpectations/stage_task_list_w__sortBy_short_names__runtime_expectation.json
@@ -0,0 +1,561 @@
+[ {
+  "taskId" : 40,
+  "index" : 40,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.197GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 4,
+    "executorRunTime" : 14,
+    "resultSize" : 2065,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 94792,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 86,
+  "index" : 86,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.374GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 3,
+    "executorRunTime" : 16,
+    "resultSize" : 2065,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 1,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 95848,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 41,
+  "index" : 41,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.200GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 2,
+    "executorRunTime" : 16,
+    "resultSize" : 2065,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 90765,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 68,
+  "index" : 68,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.306GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 2,
+    "executorRunTime" : 16,
+    "resultSize" : 2065,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 101750,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 58,
+  "index" : 58,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.263GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 3,
+    "executorRunTime" : 16,
+    "resultSize" : 2065,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 97521,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 43,
+  "index" : 43,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.204GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 2,
+    "executorRunTime" : 16,
+    "resultSize" : 2065,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 171516,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 57,
+  "index" : 57,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.257GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 3,
+    "executorRunTime" : 16,
+    "resultSize" : 2065,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 96849,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 59,
+  "index" : 59,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.265GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 3,
+    "executorRunTime" : 17,
+    "resultSize" : 2065,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 100753,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 32,
+  "index" : 32,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.148GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 3,
+    "executorRunTime" : 17,
+    "resultSize" : 2065,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 89603,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 87,
+  "index" : 87,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.374GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 12,
+    "executorRunTime" : 17,
+    "resultSize" : 2065,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 102159,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 99,
+  "index" : 99,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.426GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 2,
+    "executorRunTime" : 17,
+    "resultSize" : 2065,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70565,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 133964,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 63,
+  "index" : 63,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.276GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 20,
+    "executorRunTime" : 17,
+    "resultSize" : 2065,
+    "jvmGcTime" : 5,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 102779,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 90,
+  "index" : 90,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.385GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 2,
+    "executorRunTime" : 17,
+    "resultSize" : 2065,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 98472,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 39,
+  "index" : 39,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.180GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 2,
+    "executorRunTime" : 17,
+    "resultSize" : 2065,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 98748,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 42,
+  "index" : 42,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.203GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 10,
+    "executorRunTime" : 17,
+    "resultSize" : 2065,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 103713,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 51,
+  "index" : 51,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.242GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 2,
+    "executorRunTime" : 17,
+    "resultSize" : 2065,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 96013,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 50,
+  "index" : 50,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.240GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 4,
+    "executorRunTime" : 18,
+    "resultSize" : 2065,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 90836,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 53,
+  "index" : 53,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.244GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 6,
+    "executorRunTime" : 18,
+    "resultSize" : 2065,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 92835,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 44,
+  "index" : 44,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.205GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 3,
+    "executorRunTime" : 18,
+    "resultSize" : 2065,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 98293,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 80,
+  "index" : 80,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.341GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 13,
+    "executorRunTime" : 18,
+    "resultSize" : 2065,
+    "jvmGcTime" : 5,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 98069,
+      "recordsWritten" : 10
+    }
+  }
+} ]
\ No newline at end of file
diff --git a/core/src/test/resources/HistoryServerExpectations/stage_task_summary_w__custom_quantiles_expectation.json b/core/src/test/resources/HistoryServerExpectations/stage_task_summary_w__custom_quantiles_expectation.json
new file mode 100644
index 0000000000000..bc3c302813de2
--- /dev/null
+++ b/core/src/test/resources/HistoryServerExpectations/stage_task_summary_w__custom_quantiles_expectation.json
@@ -0,0 +1,19 @@
+{
+  "quantiles" : [ 0.01, 0.5, 0.99 ],
+  "executorDeserializeTime" : [ 1.0, 3.0, 36.0 ],
+  "executorRunTime" : [ 16.0, 28.0, 351.0 ],
+  "resultSize" : [ 2010.0, 2065.0, 2065.0 ],
+  "jvmGcTime" : [ 0.0, 0.0, 7.0 ],
+  "resultSerializationTime" : [ 0.0, 0.0, 2.0 ],
+  "memoryBytesSpilled" : [ 0.0, 0.0, 0.0 ],
+  "diskBytesSpilled" : [ 0.0, 0.0, 0.0 ],
+  "inputMetrics" : {
+    "bytesRead" : [ 60488.0, 70564.0, 70565.0 ],
+    "recordsRead" : [ 10000.0, 10000.0, 10000.0 ]
+  },
+  "shuffleWriteMetrics" : {
+    "writeBytes" : [ 1710.0, 1710.0, 1710.0 ],
+    "writeRecords" : [ 10.0, 10.0, 10.0 ],
+    "writeTime" : [ 89437.0, 102159.0, 4016617.0 ]
+  }
+}
\ No newline at end of file
diff --git a/core/src/test/resources/HistoryServerExpectations/stage_task_summary_w_shuffle_read_expectation.json b/core/src/test/resources/HistoryServerExpectations/stage_task_summary_w_shuffle_read_expectation.json
new file mode 100644
index 0000000000000..e084c839f1d5a
--- /dev/null
+++ b/core/src/test/resources/HistoryServerExpectations/stage_task_summary_w_shuffle_read_expectation.json
@@ -0,0 +1,19 @@
+{
+  "quantiles" : [ 0.05, 0.25, 0.5, 0.75, 0.95 ],
+  "executorDeserializeTime" : [ 1.0, 2.0, 2.0, 2.0, 3.0 ],
+  "executorRunTime" : [ 30.0, 74.0, 75.0, 76.0, 79.0 ],
+  "resultSize" : [ 1034.0, 1034.0, 1034.0, 1034.0, 1034.0 ],
+  "jvmGcTime" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ],
+  "resultSerializationTime" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ],
+  "memoryBytesSpilled" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ],
+  "diskBytesSpilled" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ],
+  "shuffleReadMetrics" : {
+    "readBytes" : [ 17100.0, 17100.0, 17100.0, 17100.0, 17100.0 ],
+    "readRecords" : [ 100.0, 100.0, 100.0, 100.0, 100.0 ],
+    "remoteBlocksFetched" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ],
+    "localBlocksFetched" : [ 100.0, 100.0, 100.0, 100.0, 100.0 ],
+    "fetchWaitTime" : [ 0.0, 0.0, 0.0, 1.0, 1.0 ],
+    "remoteBytesRead" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ],
+    "totalBlocksFetched" : [ 100.0, 100.0, 100.0, 100.0, 100.0 ]
+  }
+}
\ No newline at end of file
diff --git a/core/src/test/resources/HistoryServerExpectations/stage_task_summary_w_shuffle_write_expectation.json b/core/src/test/resources/HistoryServerExpectations/stage_task_summary_w_shuffle_write_expectation.json
new file mode 100644
index 0000000000000..6ac7811ce691b
--- /dev/null
+++ b/core/src/test/resources/HistoryServerExpectations/stage_task_summary_w_shuffle_write_expectation.json
@@ -0,0 +1,19 @@
+{
+  "quantiles" : [ 0.05, 0.25, 0.5, 0.75, 0.95 ],
+  "executorDeserializeTime" : [ 2.0, 2.0, 3.0, 7.0, 31.0 ],
+  "executorRunTime" : [ 16.0, 18.0, 28.0, 49.0, 349.0 ],
+  "resultSize" : [ 2010.0, 2065.0, 2065.0, 2065.0, 2065.0 ],
+  "jvmGcTime" : [ 0.0, 0.0, 0.0, 5.0, 7.0 ],
+  "resultSerializationTime" : [ 0.0, 0.0, 0.0, 0.0, 1.0 ],
+  "memoryBytesSpilled" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ],
+  "diskBytesSpilled" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ],
+  "inputMetrics" : {
+    "bytesRead" : [ 60488.0, 70564.0, 70564.0, 70564.0, 70564.0 ],
+    "recordsRead" : [ 10000.0, 10000.0, 10000.0, 10000.0, 10000.0 ]
+  },
+  "shuffleWriteMetrics" : {
+    "writeBytes" : [ 1710.0, 1710.0, 1710.0, 1710.0, 1710.0 ],
+    "writeRecords" : [ 10.0, 10.0, 10.0, 10.0, 10.0 ],
+    "writeTime" : [ 90329.0, 95848.0, 102159.0, 121551.0, 2579051.0 ]
+  }
+}
\ No newline at end of file
diff --git a/core/src/test/resources/HistoryServerExpectations/stage_with_accumulable_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/stage_with_accumulable_json_expectation.json
new file mode 100644
index 0000000000000..32d5731676ad5
--- /dev/null
+++ b/core/src/test/resources/HistoryServerExpectations/stage_with_accumulable_json_expectation.json
@@ -0,0 +1,242 @@
+{
+  "status" : "COMPLETE",
+  "stageId" : 0,
+  "attemptId" : 0,
+  "numActiveTasks" : 0,
+  "numCompleteTasks" : 8,
+  "numFailedTasks" : 0,
+  "executorRunTime" : 120,
+  "inputBytes" : 0,
+  "inputRecords" : 0,
+  "outputBytes" : 0,
+  "outputRecords" : 0,
+  "shuffleReadBytes" : 0,
+  "shuffleReadRecords" : 0,
+  "shuffleWriteBytes" : 0,
+  "shuffleWriteRecords" : 0,
+  "memoryBytesSpilled" : 0,
+  "diskBytesSpilled" : 0,
+  "name" : "foreach at <console>:15",
+  "details" : "org.apache.spark.rdd.RDD.foreach(RDD.scala:765)\n$line9.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:15)\n$line9.$read$$iwC$$iwC$$iwC.<init>(<console>:20)\n$line9.$read$$iwC$$iwC.<init>(<console>:22)\n$line9.$read$$iwC.<init>(<console>:24)\n$line9.$read.<init>(<console>:26)\n$line9.$read$.<init>(<console>:30)\n$line9.$read$.<clinit>(<console>)\n$line9.$eval$.<init>(<console>:7)\n$line9.$eval$.<clinit>(<console>)\n$line9.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:483)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)",
+  "schedulingPool" : "default",
+  "accumulatorUpdates" : [ {
+    "id" : 1,
+    "name" : "my counter",
+    "value" : "5050"
+  } ],
+  "tasks" : {
+    "2" : {
+      "taskId" : 2,
+      "index" : 2,
+      "attempt" : 0,
+      "launchTime" : "2015-03-16T19:25:36.522GMT",
+      "executorId" : "<driver>",
+      "host" : "localhost",
+      "taskLocality" : "PROCESS_LOCAL",
+      "speculative" : false,
+      "accumulatorUpdates" : [ {
+        "id" : 1,
+        "name" : "my counter",
+        "update" : "378",
+        "value" : "378"
+      } ],
+      "taskMetrics" : {
+        "executorDeserializeTime" : 13,
+        "executorRunTime" : 15,
+        "resultSize" : 697,
+        "jvmGcTime" : 0,
+        "resultSerializationTime" : 2,
+        "memoryBytesSpilled" : 0,
+        "diskBytesSpilled" : 0
+      }
+    },
+    "5" : {
+      "taskId" : 5,
+      "index" : 5,
+      "attempt" : 0,
+      "launchTime" : "2015-03-16T19:25:36.523GMT",
+      "executorId" : "<driver>",
+      "host" : "localhost",
+      "taskLocality" : "PROCESS_LOCAL",
+      "speculative" : false,
+      "accumulatorUpdates" : [ {
+        "id" : 1,
+        "name" : "my counter",
+        "update" : "897",
+        "value" : "3750"
+      } ],
+      "taskMetrics" : {
+        "executorDeserializeTime" : 12,
+        "executorRunTime" : 15,
+        "resultSize" : 697,
+        "jvmGcTime" : 0,
+        "resultSerializationTime" : 2,
+        "memoryBytesSpilled" : 0,
+        "diskBytesSpilled" : 0
+      }
+    },
+    "4" : {
+      "taskId" : 4,
+      "index" : 4,
+      "attempt" : 0,
+      "launchTime" : "2015-03-16T19:25:36.522GMT",
+      "executorId" : "<driver>",
+      "host" : "localhost",
+      "taskLocality" : "PROCESS_LOCAL",
+      "speculative" : false,
+      "accumulatorUpdates" : [ {
+        "id" : 1,
+        "name" : "my counter",
+        "update" : "678",
+        "value" : "2853"
+      } ],
+      "taskMetrics" : {
+        "executorDeserializeTime" : 12,
+        "executorRunTime" : 15,
+        "resultSize" : 697,
+        "jvmGcTime" : 0,
+        "resultSerializationTime" : 1,
+        "memoryBytesSpilled" : 0,
+        "diskBytesSpilled" : 0
+      }
+    },
+    "7" : {
+      "taskId" : 7,
+      "index" : 7,
+      "attempt" : 0,
+      "launchTime" : "2015-03-16T19:25:36.524GMT",
+      "executorId" : "<driver>",
+      "host" : "localhost",
+      "taskLocality" : "PROCESS_LOCAL",
+      "speculative" : false,
+      "accumulatorUpdates" : [ {
+        "id" : 1,
+        "name" : "my counter",
+        "update" : "1222",
+        "value" : "4972"
+      } ],
+      "taskMetrics" : {
+        "executorDeserializeTime" : 12,
+        "executorRunTime" : 15,
+        "resultSize" : 697,
+        "jvmGcTime" : 0,
+        "resultSerializationTime" : 2,
+        "memoryBytesSpilled" : 0,
+        "diskBytesSpilled" : 0
+      }
+    },
+    "1" : {
+      "taskId" : 1,
+      "index" : 1,
+      "attempt" : 0,
+      "launchTime" : "2015-03-16T19:25:36.521GMT",
+      "executorId" : "<driver>",
+      "host" : "localhost",
+      "taskLocality" : "PROCESS_LOCAL",
+      "speculative" : false,
+      "accumulatorUpdates" : [ {
+        "id" : 1,
+        "name" : "my counter",
+        "update" : "247",
+        "value" : "2175"
+      } ],
+      "taskMetrics" : {
+        "executorDeserializeTime" : 14,
+        "executorRunTime" : 15,
+        "resultSize" : 697,
+        "jvmGcTime" : 0,
+        "resultSerializationTime" : 2,
+        "memoryBytesSpilled" : 0,
+        "diskBytesSpilled" : 0
+      }
+    },
+    "3" : {
+      "taskId" : 3,
+      "index" : 3,
+      "attempt" : 0,
+      "launchTime" : "2015-03-16T19:25:36.522GMT",
+      "executorId" : "<driver>",
+      "host" : "localhost",
+      "taskLocality" : "PROCESS_LOCAL",
+      "speculative" : false,
+      "accumulatorUpdates" : [ {
+        "id" : 1,
+        "name" : "my counter",
+        "update" : "572",
+        "value" : "950"
+      } ],
+      "taskMetrics" : {
+        "executorDeserializeTime" : 13,
+        "executorRunTime" : 15,
+        "resultSize" : 697,
+        "jvmGcTime" : 0,
+        "resultSerializationTime" : 2,
+        "memoryBytesSpilled" : 0,
+        "diskBytesSpilled" : 0
+      }
+    },
+    "6" : {
+      "taskId" : 6,
+      "index" : 6,
+      "attempt" : 0,
+      "launchTime" : "2015-03-16T19:25:36.523GMT",
+      "executorId" : "<driver>",
+      "host" : "localhost",
+      "taskLocality" : "PROCESS_LOCAL",
+      "speculative" : false,
+      "accumulatorUpdates" : [ {
+        "id" : 1,
+        "name" : "my counter",
+        "update" : "978",
+        "value" : "1928"
+      } ],
+      "taskMetrics" : {
+        "executorDeserializeTime" : 12,
+        "executorRunTime" : 15,
+        "resultSize" : 697,
+        "jvmGcTime" : 0,
+        "resultSerializationTime" : 2,
+        "memoryBytesSpilled" : 0,
+        "diskBytesSpilled" : 0
+      }
+    },
+    "0" : {
+      "taskId" : 0,
+      "index" : 0,
+      "attempt" : 0,
+      "launchTime" : "2015-03-16T19:25:36.515GMT",
+      "executorId" : "<driver>",
+      "host" : "localhost",
+      "taskLocality" : "PROCESS_LOCAL",
+      "speculative" : false,
+      "accumulatorUpdates" : [ {
+        "id" : 1,
+        "name" : "my counter",
+        "update" : "78",
+        "value" : "5050"
+      } ],
+      "taskMetrics" : {
+        "executorDeserializeTime" : 14,
+        "executorRunTime" : 15,
+        "resultSize" : 697,
+        "jvmGcTime" : 0,
+        "resultSerializationTime" : 2,
+        "memoryBytesSpilled" : 0,
+        "diskBytesSpilled" : 0
+      }
+    }
+  },
+  "executorSummary" : {
+    "<driver>" : {
+      "taskTime" : 418,
+      "failedTasks" : 0,
+      "succeededTasks" : 8,
+      "inputBytes" : 0,
+      "outputBytes" : 0,
+      "shuffleRead" : 0,
+      "shuffleWrite" : 0,
+      "memoryBytesSpilled" : 0,
+      "diskBytesSpilled" : 0
+    }
+  }
+}
\ No newline at end of file
diff --git a/core/src/test/resources/HistoryServerExpectations/succeeded_failed_job_list_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/succeeded_failed_job_list_json_expectation.json
new file mode 100644
index 0000000000000..cab4750270dfa
--- /dev/null
+++ b/core/src/test/resources/HistoryServerExpectations/succeeded_failed_job_list_json_expectation.json
@@ -0,0 +1,43 @@
+[ {
+  "jobId" : 2,
+  "name" : "count at <console>:17",
+  "stageIds" : [ 3 ],
+  "status" : "SUCCEEDED",
+  "numTasks" : 8,
+  "numActiveTasks" : 0,
+  "numCompletedTasks" : 8,
+  "numSkippedTasks" : 8,
+  "numFailedTasks" : 0,
+  "numActiveStages" : 0,
+  "numCompletedStages" : 1,
+  "numSkippedStages" : 0,
+  "numFailedStages" : 0
+}, {
+  "jobId" : 1,
+  "name" : "count at <console>:20",
+  "stageIds" : [ 1, 2 ],
+  "status" : "FAILED",
+  "numTasks" : 16,
+  "numActiveTasks" : 0,
+  "numCompletedTasks" : 15,
+  "numSkippedTasks" : 15,
+  "numFailedTasks" : 1,
+  "numActiveStages" : 0,
+  "numCompletedStages" : 1,
+  "numSkippedStages" : 0,
+  "numFailedStages" : 1
+}, {
+  "jobId" : 0,
+  "name" : "count at <console>:15",
+  "stageIds" : [ 0 ],
+  "status" : "SUCCEEDED",
+  "numTasks" : 8,
+  "numActiveTasks" : 0,
+  "numCompletedTasks" : 8,
+  "numSkippedTasks" : 8,
+  "numFailedTasks" : 0,
+  "numActiveStages" : 0,
+  "numCompletedStages" : 1,
+  "numSkippedStages" : 0,
+  "numFailedStages" : 0
+} ]
\ No newline at end of file
diff --git a/core/src/test/resources/HistoryServerExpectations/succeeded_job_list_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/succeeded_job_list_json_expectation.json
new file mode 100644
index 0000000000000..6fd25befbf7e8
--- /dev/null
+++ b/core/src/test/resources/HistoryServerExpectations/succeeded_job_list_json_expectation.json
@@ -0,0 +1,29 @@
+[ {
+  "jobId" : 2,
+  "name" : "count at <console>:17",
+  "stageIds" : [ 3 ],
+  "status" : "SUCCEEDED",
+  "numTasks" : 8,
+  "numActiveTasks" : 0,
+  "numCompletedTasks" : 8,
+  "numSkippedTasks" : 8,
+  "numFailedTasks" : 0,
+  "numActiveStages" : 0,
+  "numCompletedStages" : 1,
+  "numSkippedStages" : 0,
+  "numFailedStages" : 0
+}, {
+  "jobId" : 0,
+  "name" : "count at <console>:15",
+  "stageIds" : [ 0 ],
+  "status" : "SUCCEEDED",
+  "numTasks" : 8,
+  "numActiveTasks" : 0,
+  "numCompletedTasks" : 8,
+  "numSkippedTasks" : 8,
+  "numFailedTasks" : 0,
+  "numActiveStages" : 0,
+  "numCompletedStages" : 1,
+  "numSkippedStages" : 0,
+  "numFailedStages" : 0
+} ]
\ No newline at end of file
diff --git a/core/src/test/resources/spark-events/local-1422981759269/APPLICATION_COMPLETE b/core/src/test/resources/spark-events/local-1422981759269/APPLICATION_COMPLETE
new file mode 100755
index 0000000000000..e69de29bb2d1d
diff --git a/core/src/test/resources/spark-events/local-1422981759269/EVENT_LOG_1 b/core/src/test/resources/spark-events/local-1422981759269/EVENT_LOG_1
new file mode 100755
index 0000000000000..4794e56d1107a
--- /dev/null
+++ b/core/src/test/resources/spark-events/local-1422981759269/EVENT_LOG_1
@@ -0,0 +1,88 @@
+{"Event":"SparkListenerBlockManagerAdded","Block Manager ID":{"Executor ID":"<driver>","Host":"localhost","Port":57967},"Maximum Memory":278302556,"Timestamp":1422981759407}
+{"Event":"SparkListenerEnvironmentUpdate","JVM Information":{"Java Home":"/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre","Java Version":"1.7.0_67 (Oracle Corporation)","Scala Version":"version 2.10.4"},"Spark Properties":{"spark.driver.host":"192.168.1.103","spark.eventLog.enabled":"true","spark.driver.port":"57965","spark.repl.class.uri":"http://192.168.1.103:57964","spark.jars":"","spark.app.name":"Spark shell","spark.scheduler.mode":"FIFO","spark.executor.id":"driver","spark.master":"local[*]","spark.fileserver.uri":"http://192.168.1.103:57966","spark.tachyonStore.folderName":"spark-fd6c823a-8a18-4113-8306-1fa7bb623a7f","spark.app.id":"local-1422981759269"},"System Properties":{"java.io.tmpdir":"/var/folders/36/m29jw1z95qv4ywb1c4n0rz000000gp/T/","line.separator":"\n","path.separator":":","sun.management.compiler":"HotSpot 64-Bit Tiered Compilers","SPARK_SUBMIT":"true","sun.cpu.endian":"little","java.specification.version":"1.7","java.vm.specification.name":"Java Virtual Machine Specification","java.vendor":"Oracle Corporation","java.vm.specification.version":"1.7","user.home":"/Users/irashid","file.encoding.pkg":"sun.io","sun.nio.ch.bugLevel":"","ftp.nonProxyHosts":"local|*.local|169.254/16|*.169.254/16","sun.arch.data.model":"64","sun.boot.library.path":"/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib","user.dir":"/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4","java.library.path":"/Users/irashid/Library/Java/Extensions:/Library/Java/Extensions:/Network/Library/Java/Extensions:/System/Library/Java/Extensions:/usr/lib/java:.","sun.cpu.isalist":"","os.arch":"x86_64","java.vm.version":"24.65-b04","java.endorsed.dirs":"/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/endorsed","java.runtime.version":"1.7.0_67-b01","java.vm.info":"mixed mode","java.ext.dirs":"/Users/irashid/Library/Java/Extensions:/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/ext:/Library/Java/Extensions:/Network/Library/Java/Extensions:/System/Library/Java/Extensions:/usr/lib/java","java.runtime.name":"Java(TM) SE Runtime Environment","file.separator":"/","java.class.version":"51.0","java.specification.name":"Java Platform API Specification","sun.boot.class.path":"/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/resources.jar:/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/rt.jar:/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/sunrsasign.jar:/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/jsse.jar:/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/jce.jar:/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/charsets.jar:/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/jfr.jar:/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/classes","file.encoding":"UTF-8","user.timezone":"America/Chicago","java.specification.vendor":"Oracle Corporation","sun.java.launcher":"SUN_STANDARD","os.version":"10.9.5","sun.os.patch.level":"unknown","gopherProxySet":"false","java.vm.specification.vendor":"Oracle Corporation","user.country":"US","sun.jnu.encoding":"UTF-8","http.nonProxyHosts":"local|*.local|169.254/16|*.169.254/16","user.language":"en","socksNonProxyHosts":"local|*.local|169.254/16|*.169.254/16","java.vendor.url":"http://java.oracle.com/","java.awt.printerjob":"sun.lwawt.macosx.CPrinterJob","java.awt.graphicsenv":"sun.awt.CGraphicsEnvironment","awt.toolkit":"sun.lwawt.macosx.LWCToolkit","os.name":"Mac OS X","java.vm.vendor":"Oracle Corporation","java.vendor.url.bug":"http://bugreport.sun.com/bugreport/","user.name":"irashid","java.vm.name":"Java HotSpot(TM) 64-Bit Server VM","sun.java.command":"org.apache.spark.deploy.SparkSubmit --class org.apache.spark.repl.Main --conf spark.eventLog.enabled=true spark-shell","java.home":"/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre","java.version":"1.7.0_67","sun.io.unicode.encoding":"UnicodeBig"},"Classpath Entries":{"/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/lib/spark-assembly-1.2.0-hadoop2.4.0.jar":"System Classpath","/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/lib/datanucleus-api-jdo-3.2.6.jar":"System Classpath","/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/lib/datanucleus-rdbms-3.2.9.jar":"System Classpath","/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/lib/datanucleus-core-3.2.10.jar":"System Classpath","/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/conf":"System Classpath"}}
+{"Event":"SparkListenerApplicationStart","App Name":"Spark shell","App ID":"local-1422981759269","Timestamp":1422981758277,"User":"irashid"}
+{"Event":"SparkListenerJobStart","Job ID":0,"Stage Infos":[{"Stage ID":0,"Stage Attempt ID":0,"Stage Name":"count at <console>:15","Number of Tasks":8,"RDD Info":[{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line9.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:15)\n$line9.$read$$iwC$$iwC$$iwC.<init>(<console>:20)\n$line9.$read$$iwC$$iwC.<init>(<console>:22)\n$line9.$read$$iwC.<init>(<console>:24)\n$line9.$read.<init>(<console>:26)\n$line9.$read$.<init>(<console>:30)\n$line9.$read$.<clinit>(<console>)\n$line9.$eval$.<init>(<console>:7)\n$line9.$eval$.<clinit>(<console>)\n$line9.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}],"Stage IDs":[0]}
+{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":0,"Stage Attempt ID":0,"Stage Name":"count at <console>:15","Number of Tasks":8,"RDD Info":[{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line9.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:15)\n$line9.$read$$iwC$$iwC$$iwC.<init>(<console>:20)\n$line9.$read$$iwC$$iwC.<init>(<console>:22)\n$line9.$read$$iwC.<init>(<console>:24)\n$line9.$read.<init>(<console>:26)\n$line9.$read$.<init>(<console>:30)\n$line9.$read$.<clinit>(<console>)\n$line9.$eval$.<init>(<console>:7)\n$line9.$eval$.<clinit>(<console>)\n$line9.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":0,"Index":0,"Attempt":0,"Launch Time":1422981762075,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":1,"Index":1,"Attempt":0,"Launch Time":1422981762081,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":2,"Index":2,"Attempt":0,"Launch Time":1422981762081,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":3,"Index":3,"Attempt":0,"Launch Time":1422981762082,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":4,"Index":4,"Attempt":0,"Launch Time":1422981762083,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":5,"Index":5,"Attempt":0,"Launch Time":1422981762084,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":6,"Index":6,"Attempt":0,"Launch Time":1422981762084,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":7,"Index":7,"Attempt":0,"Launch Time":1422981762085,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":6,"Index":6,"Attempt":0,"Launch Time":1422981762084,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981762632,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":10,"Executor Run Time":521,"Result Size":1268,"JVM GC Time":20,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_6","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":2,"Index":2,"Attempt":0,"Launch Time":1422981762081,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981762633,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":10,"Executor Run Time":520,"Result Size":1268,"JVM GC Time":20,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_2","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":3,"Index":3,"Attempt":0,"Launch Time":1422981762082,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981762634,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":10,"Executor Run Time":521,"Result Size":1268,"JVM GC Time":20,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_3","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":5,"Index":5,"Attempt":0,"Launch Time":1422981762084,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981762634,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":10,"Executor Run Time":522,"Result Size":1268,"JVM GC Time":20,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_5","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":4,"Index":4,"Attempt":0,"Launch Time":1422981762083,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981762635,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":10,"Executor Run Time":522,"Result Size":1268,"JVM GC Time":20,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_4","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":0,"Index":0,"Attempt":0,"Launch Time":1422981762075,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981762636,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":11,"Executor Run Time":522,"Result Size":1268,"JVM GC Time":20,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_0","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":7,"Index":7,"Attempt":0,"Launch Time":1422981762085,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981762636,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":9,"Executor Run Time":521,"Result Size":1268,"JVM GC Time":20,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_7","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":1,"Index":1,"Attempt":0,"Launch Time":1422981762081,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981762637,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":11,"Executor Run Time":522,"Result Size":1268,"JVM GC Time":20,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_1","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}}
+{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":0,"Stage Attempt ID":0,"Stage Name":"count at <console>:15","Number of Tasks":8,"RDD Info":[{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":8,"Memory Size":28000128,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line9.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:15)\n$line9.$read$$iwC$$iwC$$iwC.<init>(<console>:20)\n$line9.$read$$iwC$$iwC.<init>(<console>:22)\n$line9.$read$$iwC.<init>(<console>:24)\n$line9.$read.<init>(<console>:26)\n$line9.$read$.<init>(<console>:30)\n$line9.$read$.<clinit>(<console>)\n$line9.$eval$.<init>(<console>:7)\n$line9.$eval$.<clinit>(<console>)\n$line9.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Submission Time":1422981762069,"Completion Time":1422981762637,"Accumulables":[]}}
+{"Event":"SparkListenerJobEnd","Job ID":0,"Job Result":{"Result":"JobSucceeded"}}
+{"Event":"SparkListenerJobStart","Job ID":1,"Stage Infos":[{"Stage ID":1,"Stage Attempt ID":0,"Stage Name":"map at <console>:14","Number of Tasks":8,"RDD Info":[{"RDD ID":1,"Name":"1","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.map(RDD.scala:271)\n$line10.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:14)\n$line10.$read$$iwC$$iwC$$iwC.<init>(<console>:19)\n$line10.$read$$iwC$$iwC.<init>(<console>:21)\n$line10.$read$$iwC.<init>(<console>:23)\n$line10.$read.<init>(<console>:25)\n$line10.$read$.<init>(<console>:29)\n$line10.$read$.<clinit>(<console>)\n$line10.$eval$.<init>(<console>:7)\n$line10.$eval$.<clinit>(<console>)\n$line10.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]},{"Stage ID":2,"Stage Attempt ID":0,"Stage Name":"first at <console>:17","Number of Tasks":1,"RDD Info":[{"RDD ID":2,"Name":"2","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.first(RDD.scala:1093)\n$line11.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:17)\n$line11.$read$$iwC$$iwC$$iwC.<init>(<console>:22)\n$line11.$read$$iwC$$iwC.<init>(<console>:24)\n$line11.$read$$iwC.<init>(<console>:26)\n$line11.$read.<init>(<console>:28)\n$line11.$read$.<init>(<console>:32)\n$line11.$read$.<clinit>(<console>)\n$line11.$eval$.<init>(<console>:7)\n$line11.$eval$.<clinit>(<console>)\n$line11.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}],"Stage IDs":[1,2]}
+{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":1,"Stage Attempt ID":0,"Stage Name":"map at <console>:14","Number of Tasks":8,"RDD Info":[{"RDD ID":1,"Name":"1","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.map(RDD.scala:271)\n$line10.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:14)\n$line10.$read$$iwC$$iwC$$iwC.<init>(<console>:19)\n$line10.$read$$iwC$$iwC.<init>(<console>:21)\n$line10.$read$$iwC.<init>(<console>:23)\n$line10.$read.<init>(<console>:25)\n$line10.$read$.<init>(<console>:29)\n$line10.$read$.<clinit>(<console>)\n$line10.$eval$.<init>(<console>:7)\n$line10.$eval$.<clinit>(<console>)\n$line10.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":8,"Index":0,"Attempt":0,"Launch Time":1422981763578,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":9,"Index":1,"Attempt":0,"Launch Time":1422981763578,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":10,"Index":2,"Attempt":0,"Launch Time":1422981763579,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":11,"Index":3,"Attempt":0,"Launch Time":1422981763579,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":12,"Index":4,"Attempt":0,"Launch Time":1422981763580,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":13,"Index":5,"Attempt":0,"Launch Time":1422981763580,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":14,"Index":6,"Attempt":0,"Launch Time":1422981763581,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":15,"Index":7,"Attempt":0,"Launch Time":1422981763581,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":10,"Index":2,"Attempt":0,"Launch Time":1422981763579,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981764001,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":406,"Result Size":1902,"JVM GC Time":18,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1648,"Shuffle Write Time":138000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":9,"Index":1,"Attempt":0,"Launch Time":1422981763578,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981764002,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":407,"Result Size":1902,"JVM GC Time":18,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1648,"Shuffle Write Time":106000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":12,"Index":4,"Attempt":0,"Launch Time":1422981763580,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981764002,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":407,"Result Size":1902,"JVM GC Time":18,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1645,"Shuffle Write Time":99000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":8,"Index":0,"Attempt":0,"Launch Time":1422981763578,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981764003,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":407,"Result Size":1902,"JVM GC Time":18,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1648,"Shuffle Write Time":123000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":15,"Index":7,"Attempt":0,"Launch Time":1422981763581,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981764003,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":406,"Result Size":1902,"JVM GC Time":18,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1648,"Shuffle Write Time":108000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":11,"Index":3,"Attempt":0,"Launch Time":1422981763579,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981764004,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":407,"Result Size":1902,"JVM GC Time":18,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1647,"Shuffle Write Time":97000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":14,"Index":6,"Attempt":0,"Launch Time":1422981763581,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981764004,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":407,"Result Size":1902,"JVM GC Time":18,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1648,"Shuffle Write Time":132000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":13,"Index":5,"Attempt":0,"Launch Time":1422981763580,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981764005,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":407,"Result Size":1902,"JVM GC Time":18,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1648,"Shuffle Write Time":81000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}}
+{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":1,"Stage Attempt ID":0,"Stage Name":"map at <console>:14","Number of Tasks":8,"RDD Info":[{"RDD ID":1,"Name":"1","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.map(RDD.scala:271)\n$line10.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:14)\n$line10.$read$$iwC$$iwC$$iwC.<init>(<console>:19)\n$line10.$read$$iwC$$iwC.<init>(<console>:21)\n$line10.$read$$iwC.<init>(<console>:23)\n$line10.$read.<init>(<console>:25)\n$line10.$read$.<init>(<console>:29)\n$line10.$read$.<clinit>(<console>)\n$line10.$eval$.<init>(<console>:7)\n$line10.$eval$.<clinit>(<console>)\n$line10.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Submission Time":1422981763578,"Completion Time":1422981764005,"Accumulables":[]}}
+{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":2,"Stage Attempt ID":0,"Stage Name":"first at <console>:17","Number of Tasks":1,"RDD Info":[{"RDD ID":2,"Name":"2","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.first(RDD.scala:1093)\n$line11.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:17)\n$line11.$read$$iwC$$iwC$$iwC.<init>(<console>:22)\n$line11.$read$$iwC$$iwC.<init>(<console>:24)\n$line11.$read$$iwC.<init>(<console>:26)\n$line11.$read.<init>(<console>:28)\n$line11.$read$.<init>(<console>:32)\n$line11.$read$.<clinit>(<console>)\n$line11.$eval$.<init>(<console>:7)\n$line11.$eval$.<clinit>(<console>)\n$line11.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":2,"Stage Attempt ID":0,"Task Info":{"Task ID":16,"Index":0,"Attempt":0,"Launch Time":1422981764014,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":2,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":16,"Index":0,"Attempt":0,"Launch Time":1422981764014,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981764045,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":0,"Executor Run Time":28,"Result Size":1013,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
+{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":2,"Stage Attempt ID":0,"Stage Name":"first at <console>:17","Number of Tasks":1,"RDD Info":[{"RDD ID":2,"Name":"2","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.first(RDD.scala:1093)\n$line11.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:17)\n$line11.$read$$iwC$$iwC$$iwC.<init>(<console>:22)\n$line11.$read$$iwC$$iwC.<init>(<console>:24)\n$line11.$read$$iwC.<init>(<console>:26)\n$line11.$read.<init>(<console>:28)\n$line11.$read$.<init>(<console>:32)\n$line11.$read$.<clinit>(<console>)\n$line11.$eval$.<init>(<console>:7)\n$line11.$eval$.<clinit>(<console>)\n$line11.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Submission Time":1422981764014,"Completion Time":1422981764045,"Accumulables":[]}}
+{"Event":"SparkListenerJobEnd","Job ID":1,"Job Result":{"Result":"JobSucceeded"}}
+{"Event":"SparkListenerJobStart","Job ID":2,"Stage Infos":[{"Stage ID":3,"Stage Attempt ID":0,"Stage Name":"map at <console>:14","Number of Tasks":8,"RDD Info":[{"RDD ID":1,"Name":"1","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.map(RDD.scala:271)\n$line10.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:14)\n$line10.$read$$iwC$$iwC$$iwC.<init>(<console>:19)\n$line10.$read$$iwC$$iwC.<init>(<console>:21)\n$line10.$read$$iwC.<init>(<console>:23)\n$line10.$read.<init>(<console>:25)\n$line10.$read$.<init>(<console>:29)\n$line10.$read$.<clinit>(<console>)\n$line10.$eval$.<init>(<console>:7)\n$line10.$eval$.<clinit>(<console>)\n$line10.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]},{"Stage ID":4,"Stage Attempt ID":0,"Stage Name":"saveAsTextFile at <console>:19","Number of Tasks":8,"RDD Info":[{"RDD ID":3,"Name":"3","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":2,"Name":"2","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.saveAsTextFile(RDD.scala:1164)\n$line13.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:19)\n$line13.$read$$iwC$$iwC$$iwC.<init>(<console>:24)\n$line13.$read$$iwC$$iwC.<init>(<console>:26)\n$line13.$read$$iwC.<init>(<console>:28)\n$line13.$read.<init>(<console>:30)\n$line13.$read$.<init>(<console>:34)\n$line13.$read$.<clinit>(<console>)\n$line13.$eval$.<init>(<console>:7)\n$line13.$eval$.<clinit>(<console>)\n$line13.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}],"Stage IDs":[3,4]}
+{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":4,"Stage Attempt ID":0,"Stage Name":"saveAsTextFile at <console>:19","Number of Tasks":8,"RDD Info":[{"RDD ID":3,"Name":"3","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":2,"Name":"2","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.saveAsTextFile(RDD.scala:1164)\n$line13.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:19)\n$line13.$read$$iwC$$iwC$$iwC.<init>(<console>:24)\n$line13.$read$$iwC$$iwC.<init>(<console>:26)\n$line13.$read$$iwC.<init>(<console>:28)\n$line13.$read.<init>(<console>:30)\n$line13.$read$.<init>(<console>:34)\n$line13.$read$.<clinit>(<console>)\n$line13.$eval$.<init>(<console>:7)\n$line13.$eval$.<clinit>(<console>)\n$line13.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":4,"Stage Attempt ID":0,"Task Info":{"Task ID":17,"Index":0,"Attempt":0,"Launch Time":1422981764396,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":4,"Stage Attempt ID":0,"Task Info":{"Task ID":18,"Index":1,"Attempt":0,"Launch Time":1422981764396,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":4,"Stage Attempt ID":0,"Task Info":{"Task ID":19,"Index":2,"Attempt":0,"Launch Time":1422981764397,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":4,"Stage Attempt ID":0,"Task Info":{"Task ID":20,"Index":3,"Attempt":0,"Launch Time":1422981764397,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":4,"Stage Attempt ID":0,"Task Info":{"Task ID":21,"Index":4,"Attempt":0,"Launch Time":1422981764398,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":4,"Stage Attempt ID":0,"Task Info":{"Task ID":22,"Index":5,"Attempt":0,"Launch Time":1422981764398,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":4,"Stage Attempt ID":0,"Task Info":{"Task ID":23,"Index":6,"Attempt":0,"Launch Time":1422981764398,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":4,"Stage Attempt ID":0,"Task Info":{"Task ID":24,"Index":7,"Attempt":0,"Launch Time":1422981764399,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":4,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":24,"Index":7,"Attempt":0,"Launch Time":1422981764399,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981764642,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":240,"Result Size":824,"JVM GC Time":31,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":4,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":19,"Index":2,"Attempt":0,"Launch Time":1422981764397,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981764643,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":241,"Result Size":824,"JVM GC Time":31,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":4,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":21,"Index":4,"Attempt":0,"Launch Time":1422981764398,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981764643,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":240,"Result Size":824,"JVM GC Time":31,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":4,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":22,"Index":5,"Attempt":0,"Launch Time":1422981764398,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981764645,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":241,"Result Size":824,"JVM GC Time":31,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":4,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":23,"Index":6,"Attempt":0,"Launch Time":1422981764398,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981764645,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":241,"Result Size":824,"JVM GC Time":31,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":4,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":17,"Index":0,"Attempt":0,"Launch Time":1422981764396,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981764646,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":243,"Result Size":824,"JVM GC Time":31,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":4,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":18,"Index":1,"Attempt":0,"Launch Time":1422981764396,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981764646,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":243,"Result Size":824,"JVM GC Time":31,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":4,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":20,"Index":3,"Attempt":0,"Launch Time":1422981764397,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981764648,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":247,"Result Size":824,"JVM GC Time":31,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
+{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":4,"Stage Attempt ID":0,"Stage Name":"saveAsTextFile at <console>:19","Number of Tasks":8,"RDD Info":[{"RDD ID":3,"Name":"3","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":2,"Name":"2","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.saveAsTextFile(RDD.scala:1164)\n$line13.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:19)\n$line13.$read$$iwC$$iwC$$iwC.<init>(<console>:24)\n$line13.$read$$iwC$$iwC.<init>(<console>:26)\n$line13.$read$$iwC.<init>(<console>:28)\n$line13.$read.<init>(<console>:30)\n$line13.$read$.<init>(<console>:34)\n$line13.$read$.<clinit>(<console>)\n$line13.$eval$.<init>(<console>:7)\n$line13.$eval$.<clinit>(<console>)\n$line13.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Submission Time":1422981764396,"Completion Time":1422981764648,"Accumulables":[]}}
+{"Event":"SparkListenerJobEnd","Job ID":2,"Job Result":{"Result":"JobSucceeded"}}
+{"Event":"SparkListenerJobStart","Job ID":3,"Stage Infos":[{"Stage ID":5,"Stage Attempt ID":0,"Stage Name":"count at <console>:17","Number of Tasks":8,"RDD Info":[{"RDD ID":5,"Name":"/Users/irashid/spark-examples/tmp_data/sums","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":4,"Name":"/Users/irashid/spark-examples/tmp_data/sums","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line15.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:17)\n$line15.$read$$iwC$$iwC$$iwC.<init>(<console>:22)\n$line15.$read$$iwC$$iwC.<init>(<console>:24)\n$line15.$read$$iwC.<init>(<console>:26)\n$line15.$read.<init>(<console>:28)\n$line15.$read$.<init>(<console>:32)\n$line15.$read$.<clinit>(<console>)\n$line15.$eval$.<init>(<console>:7)\n$line15.$eval$.<clinit>(<console>)\n$line15.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}],"Stage IDs":[5]}
+{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":5,"Stage Attempt ID":0,"Stage Name":"count at <console>:17","Number of Tasks":8,"RDD Info":[{"RDD ID":5,"Name":"/Users/irashid/spark-examples/tmp_data/sums","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":4,"Name":"/Users/irashid/spark-examples/tmp_data/sums","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line15.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:17)\n$line15.$read$$iwC$$iwC$$iwC.<init>(<console>:22)\n$line15.$read$$iwC$$iwC.<init>(<console>:24)\n$line15.$read$$iwC.<init>(<console>:26)\n$line15.$read.<init>(<console>:28)\n$line15.$read$.<init>(<console>:32)\n$line15.$read$.<clinit>(<console>)\n$line15.$eval$.<init>(<console>:7)\n$line15.$eval$.<clinit>(<console>)\n$line15.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":5,"Stage Attempt ID":0,"Task Info":{"Task ID":25,"Index":0,"Attempt":0,"Launch Time":1422981765026,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":5,"Stage Attempt ID":0,"Task Info":{"Task ID":26,"Index":1,"Attempt":0,"Launch Time":1422981765026,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":5,"Stage Attempt ID":0,"Task Info":{"Task ID":27,"Index":2,"Attempt":0,"Launch Time":1422981765027,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":5,"Stage Attempt ID":0,"Task Info":{"Task ID":28,"Index":3,"Attempt":0,"Launch Time":1422981765027,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":5,"Stage Attempt ID":0,"Task Info":{"Task ID":29,"Index":4,"Attempt":0,"Launch Time":1422981765027,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":5,"Stage Attempt ID":0,"Task Info":{"Task ID":30,"Index":5,"Attempt":0,"Launch Time":1422981765028,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":5,"Stage Attempt ID":0,"Task Info":{"Task ID":31,"Index":6,"Attempt":0,"Launch Time":1422981765028,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":5,"Stage Attempt ID":0,"Task Info":{"Task ID":32,"Index":7,"Attempt":0,"Launch Time":1422981765029,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":5,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":27,"Index":2,"Attempt":0,"Launch Time":1422981765027,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981765045,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":13,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":16}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":5,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":30,"Index":5,"Attempt":0,"Launch Time":1422981765028,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981765046,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":13,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":16}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":5,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":25,"Index":0,"Attempt":0,"Launch Time":1422981765026,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981765046,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":12,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":32}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":5,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":29,"Index":4,"Attempt":0,"Launch Time":1422981765027,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981765047,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":13,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":16}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":5,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":28,"Index":3,"Attempt":0,"Launch Time":1422981765027,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981765047,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":12,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":16}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":5,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":32,"Index":7,"Attempt":0,"Launch Time":1422981765029,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981765048,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":13,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":16}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":5,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":31,"Index":6,"Attempt":0,"Launch Time":1422981765028,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981765048,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":14,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":16}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":5,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":26,"Index":1,"Attempt":0,"Launch Time":1422981765026,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981765049,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":12,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":32}}}
+{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":5,"Stage Attempt ID":0,"Stage Name":"count at <console>:17","Number of Tasks":8,"RDD Info":[{"RDD ID":5,"Name":"/Users/irashid/spark-examples/tmp_data/sums","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":4,"Name":"/Users/irashid/spark-examples/tmp_data/sums","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line15.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:17)\n$line15.$read$$iwC$$iwC$$iwC.<init>(<console>:22)\n$line15.$read$$iwC$$iwC.<init>(<console>:24)\n$line15.$read$$iwC.<init>(<console>:26)\n$line15.$read.<init>(<console>:28)\n$line15.$read$.<init>(<console>:32)\n$line15.$read$.<clinit>(<console>)\n$line15.$eval$.<init>(<console>:7)\n$line15.$eval$.<clinit>(<console>)\n$line15.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Submission Time":1422981765026,"Completion Time":1422981765050,"Accumulables":[]}}
+{"Event":"SparkListenerJobEnd","Job ID":3,"Job Result":{"Result":"JobSucceeded"}}
+{"Event":"SparkListenerApplicationEnd","Timestamp":1422981766912}
diff --git a/core/src/test/resources/spark-events/local-1422981759269/SPARK_VERSION_1.2.0 b/core/src/test/resources/spark-events/local-1422981759269/SPARK_VERSION_1.2.0
new file mode 100755
index 0000000000000..e69de29bb2d1d
diff --git a/core/src/test/resources/spark-events/local-1422981780767/APPLICATION_COMPLETE b/core/src/test/resources/spark-events/local-1422981780767/APPLICATION_COMPLETE
new file mode 100755
index 0000000000000..e69de29bb2d1d
diff --git a/core/src/test/resources/spark-events/local-1422981780767/EVENT_LOG_1 b/core/src/test/resources/spark-events/local-1422981780767/EVENT_LOG_1
new file mode 100755
index 0000000000000..f14a000bf2c28
--- /dev/null
+++ b/core/src/test/resources/spark-events/local-1422981780767/EVENT_LOG_1
@@ -0,0 +1,82 @@
+{"Event":"SparkListenerBlockManagerAdded","Block Manager ID":{"Executor ID":"<driver>","Host":"localhost","Port":57971},"Maximum Memory":278302556,"Timestamp":1422981780906}
+{"Event":"SparkListenerEnvironmentUpdate","JVM Information":{"Java Home":"/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre","Java Version":"1.7.0_67 (Oracle Corporation)","Scala Version":"version 2.10.4"},"Spark Properties":{"spark.driver.host":"192.168.1.103","spark.eventLog.enabled":"true","spark.driver.port":"57969","spark.repl.class.uri":"http://192.168.1.103:57968","spark.jars":"","spark.app.name":"Spark shell","spark.scheduler.mode":"FIFO","spark.executor.id":"driver","spark.master":"local[*]","spark.fileserver.uri":"http://192.168.1.103:57970","spark.tachyonStore.folderName":"spark-3f19daee-844c-41d0-a3fc-5e3e508f9731","spark.app.id":"local-1422981780767"},"System Properties":{"java.io.tmpdir":"/var/folders/36/m29jw1z95qv4ywb1c4n0rz000000gp/T/","line.separator":"\n","path.separator":":","sun.management.compiler":"HotSpot 64-Bit Tiered Compilers","SPARK_SUBMIT":"true","sun.cpu.endian":"little","java.specification.version":"1.7","java.vm.specification.name":"Java Virtual Machine Specification","java.vendor":"Oracle Corporation","java.vm.specification.version":"1.7","user.home":"/Users/irashid","file.encoding.pkg":"sun.io","sun.nio.ch.bugLevel":"","ftp.nonProxyHosts":"local|*.local|169.254/16|*.169.254/16","sun.arch.data.model":"64","sun.boot.library.path":"/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib","user.dir":"/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4","java.library.path":"/Users/irashid/Library/Java/Extensions:/Library/Java/Extensions:/Network/Library/Java/Extensions:/System/Library/Java/Extensions:/usr/lib/java:.","sun.cpu.isalist":"","os.arch":"x86_64","java.vm.version":"24.65-b04","java.endorsed.dirs":"/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/endorsed","java.runtime.version":"1.7.0_67-b01","java.vm.info":"mixed mode","java.ext.dirs":"/Users/irashid/Library/Java/Extensions:/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/ext:/Library/Java/Extensions:/Network/Library/Java/Extensions:/System/Library/Java/Extensions:/usr/lib/java","java.runtime.name":"Java(TM) SE Runtime Environment","file.separator":"/","java.class.version":"51.0","java.specification.name":"Java Platform API Specification","sun.boot.class.path":"/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/resources.jar:/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/rt.jar:/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/sunrsasign.jar:/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/jsse.jar:/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/jce.jar:/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/charsets.jar:/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/jfr.jar:/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/classes","file.encoding":"UTF-8","user.timezone":"America/Chicago","java.specification.vendor":"Oracle Corporation","sun.java.launcher":"SUN_STANDARD","os.version":"10.9.5","sun.os.patch.level":"unknown","gopherProxySet":"false","java.vm.specification.vendor":"Oracle Corporation","user.country":"US","sun.jnu.encoding":"UTF-8","http.nonProxyHosts":"local|*.local|169.254/16|*.169.254/16","user.language":"en","socksNonProxyHosts":"local|*.local|169.254/16|*.169.254/16","java.vendor.url":"http://java.oracle.com/","java.awt.printerjob":"sun.lwawt.macosx.CPrinterJob","java.awt.graphicsenv":"sun.awt.CGraphicsEnvironment","awt.toolkit":"sun.lwawt.macosx.LWCToolkit","os.name":"Mac OS X","java.vm.vendor":"Oracle Corporation","java.vendor.url.bug":"http://bugreport.sun.com/bugreport/","user.name":"irashid","java.vm.name":"Java HotSpot(TM) 64-Bit Server VM","sun.java.command":"org.apache.spark.deploy.SparkSubmit --class org.apache.spark.repl.Main --conf spark.eventLog.enabled=true spark-shell","java.home":"/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre","java.version":"1.7.0_67","sun.io.unicode.encoding":"UnicodeBig"},"Classpath Entries":{"/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/lib/spark-assembly-1.2.0-hadoop2.4.0.jar":"System Classpath","/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/lib/datanucleus-api-jdo-3.2.6.jar":"System Classpath","/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/lib/datanucleus-rdbms-3.2.9.jar":"System Classpath","/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/lib/datanucleus-core-3.2.10.jar":"System Classpath","/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/conf":"System Classpath"}}
+{"Event":"SparkListenerApplicationStart","App Name":"Spark shell","App ID":"local-1422981780767","Timestamp":1422981779720,"User":"irashid"}
+{"Event":"SparkListenerJobStart","Job ID":0,"Stage Infos":[{"Stage ID":0,"Stage Attempt ID":0,"Stage Name":"count at <console>:15","Number of Tasks":8,"RDD Info":[{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line9.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:15)\n$line9.$read$$iwC$$iwC$$iwC.<init>(<console>:20)\n$line9.$read$$iwC$$iwC.<init>(<console>:22)\n$line9.$read$$iwC.<init>(<console>:24)\n$line9.$read.<init>(<console>:26)\n$line9.$read$.<init>(<console>:30)\n$line9.$read$.<clinit>(<console>)\n$line9.$eval$.<init>(<console>:7)\n$line9.$eval$.<clinit>(<console>)\n$line9.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}],"Stage IDs":[0]}
+{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":0,"Stage Attempt ID":0,"Stage Name":"count at <console>:15","Number of Tasks":8,"RDD Info":[{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line9.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:15)\n$line9.$read$$iwC$$iwC$$iwC.<init>(<console>:20)\n$line9.$read$$iwC$$iwC.<init>(<console>:22)\n$line9.$read$$iwC.<init>(<console>:24)\n$line9.$read.<init>(<console>:26)\n$line9.$read$.<init>(<console>:30)\n$line9.$read$.<clinit>(<console>)\n$line9.$eval$.<init>(<console>:7)\n$line9.$eval$.<clinit>(<console>)\n$line9.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":0,"Index":0,"Attempt":0,"Launch Time":1422981784234,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":1,"Index":1,"Attempt":0,"Launch Time":1422981784240,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":2,"Index":2,"Attempt":0,"Launch Time":1422981784240,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":3,"Index":3,"Attempt":0,"Launch Time":1422981784241,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":4,"Index":4,"Attempt":0,"Launch Time":1422981784241,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":5,"Index":5,"Attempt":0,"Launch Time":1422981784242,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":6,"Index":6,"Attempt":0,"Launch Time":1422981784242,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":7,"Index":7,"Attempt":0,"Launch Time":1422981784243,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":3,"Index":3,"Attempt":0,"Launch Time":1422981784241,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981784812,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":11,"Executor Run Time":543,"Result Size":1268,"JVM GC Time":25,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_3","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":1,"Index":1,"Attempt":0,"Launch Time":1422981784240,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981784814,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":12,"Executor Run Time":542,"Result Size":1268,"JVM GC Time":25,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_1","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":0,"Index":0,"Attempt":0,"Launch Time":1422981784234,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981784816,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":12,"Executor Run Time":542,"Result Size":1268,"JVM GC Time":25,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_0","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":7,"Index":7,"Attempt":0,"Launch Time":1422981784243,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981784816,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":11,"Executor Run Time":543,"Result Size":1268,"JVM GC Time":25,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_7","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":5,"Index":5,"Attempt":0,"Launch Time":1422981784242,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981784817,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":12,"Executor Run Time":541,"Result Size":1268,"JVM GC Time":25,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_5","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":4,"Index":4,"Attempt":0,"Launch Time":1422981784241,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981784817,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":11,"Executor Run Time":542,"Result Size":1268,"JVM GC Time":25,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_4","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":6,"Index":6,"Attempt":0,"Launch Time":1422981784242,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981784818,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":11,"Executor Run Time":543,"Result Size":1268,"JVM GC Time":25,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_6","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":2,"Index":2,"Attempt":0,"Launch Time":1422981784240,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981784818,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":11,"Executor Run Time":542,"Result Size":1268,"JVM GC Time":25,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_2","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}}
+{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":0,"Stage Attempt ID":0,"Stage Name":"count at <console>:15","Number of Tasks":8,"RDD Info":[{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":8,"Memory Size":28000128,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line9.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:15)\n$line9.$read$$iwC$$iwC$$iwC.<init>(<console>:20)\n$line9.$read$$iwC$$iwC.<init>(<console>:22)\n$line9.$read$$iwC.<init>(<console>:24)\n$line9.$read.<init>(<console>:26)\n$line9.$read$.<init>(<console>:30)\n$line9.$read$.<clinit>(<console>)\n$line9.$eval$.<init>(<console>:7)\n$line9.$eval$.<clinit>(<console>)\n$line9.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Submission Time":1422981784228,"Completion Time":1422981784819,"Accumulables":[]}}
+{"Event":"SparkListenerJobEnd","Job ID":0,"Job Result":{"Result":"JobSucceeded"}}
+{"Event":"SparkListenerJobStart","Job ID":1,"Stage Infos":[{"Stage ID":1,"Stage Attempt ID":0,"Stage Name":"map at <console>:14","Number of Tasks":8,"RDD Info":[{"RDD ID":1,"Name":"1","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.map(RDD.scala:271)\n$line10.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:14)\n$line10.$read$$iwC$$iwC$$iwC.<init>(<console>:19)\n$line10.$read$$iwC$$iwC.<init>(<console>:21)\n$line10.$read$$iwC.<init>(<console>:23)\n$line10.$read.<init>(<console>:25)\n$line10.$read$.<init>(<console>:29)\n$line10.$read$.<clinit>(<console>)\n$line10.$eval$.<init>(<console>:7)\n$line10.$eval$.<clinit>(<console>)\n$line10.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]},{"Stage ID":2,"Stage Attempt ID":0,"Stage Name":"count at <console>:20","Number of Tasks":8,"RDD Info":[{"RDD ID":3,"Name":"3","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":2,"Name":"2","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line11.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:20)\n$line11.$read$$iwC$$iwC$$iwC.<init>(<console>:25)\n$line11.$read$$iwC$$iwC.<init>(<console>:27)\n$line11.$read$$iwC.<init>(<console>:29)\n$line11.$read.<init>(<console>:31)\n$line11.$read$.<init>(<console>:35)\n$line11.$read$.<clinit>(<console>)\n$line11.$eval$.<init>(<console>:7)\n$line11.$eval$.<clinit>(<console>)\n$line11.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}],"Stage IDs":[1,2]}
+{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":1,"Stage Attempt ID":0,"Stage Name":"map at <console>:14","Number of Tasks":8,"RDD Info":[{"RDD ID":1,"Name":"1","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.map(RDD.scala:271)\n$line10.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:14)\n$line10.$read$$iwC$$iwC$$iwC.<init>(<console>:19)\n$line10.$read$$iwC$$iwC.<init>(<console>:21)\n$line10.$read$$iwC.<init>(<console>:23)\n$line10.$read.<init>(<console>:25)\n$line10.$read$.<init>(<console>:29)\n$line10.$read$.<clinit>(<console>)\n$line10.$eval$.<init>(<console>:7)\n$line10.$eval$.<clinit>(<console>)\n$line10.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":8,"Index":0,"Attempt":0,"Launch Time":1422981785829,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":9,"Index":1,"Attempt":0,"Launch Time":1422981785830,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":10,"Index":2,"Attempt":0,"Launch Time":1422981785830,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":11,"Index":3,"Attempt":0,"Launch Time":1422981785830,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":12,"Index":4,"Attempt":0,"Launch Time":1422981785831,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":13,"Index":5,"Attempt":0,"Launch Time":1422981785831,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":14,"Index":6,"Attempt":0,"Launch Time":1422981785832,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":15,"Index":7,"Attempt":0,"Launch Time":1422981785833,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":14,"Index":6,"Attempt":0,"Launch Time":1422981785832,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981786282,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":434,"Result Size":1902,"JVM GC Time":19,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1648,"Shuffle Write Time":88000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":8,"Index":0,"Attempt":0,"Launch Time":1422981785829,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981786283,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":435,"Result Size":1902,"JVM GC Time":19,"Result Serialization Time":2,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1648,"Shuffle Write Time":94000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":15,"Index":7,"Attempt":0,"Launch Time":1422981785833,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981786283,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":435,"Result Size":1902,"JVM GC Time":19,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1648,"Shuffle Write Time":79000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":13,"Index":5,"Attempt":0,"Launch Time":1422981785831,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981786283,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":434,"Result Size":1902,"JVM GC Time":19,"Result Serialization Time":2,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1648,"Shuffle Write Time":73000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":11,"Index":3,"Attempt":0,"Launch Time":1422981785830,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981786284,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":434,"Result Size":1902,"JVM GC Time":19,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1647,"Shuffle Write Time":83000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":9,"Index":1,"Attempt":0,"Launch Time":1422981785830,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981786284,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":436,"Result Size":1902,"JVM GC Time":19,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1648,"Shuffle Write Time":98000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":12,"Index":4,"Attempt":0,"Launch Time":1422981785831,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981786285,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":434,"Result Size":1902,"JVM GC Time":19,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1645,"Shuffle Write Time":101000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":10,"Index":2,"Attempt":0,"Launch Time":1422981785830,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981786286,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":434,"Result Size":1902,"JVM GC Time":19,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1648,"Shuffle Write Time":76000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}}
+{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":1,"Stage Attempt ID":0,"Stage Name":"map at <console>:14","Number of Tasks":8,"RDD Info":[{"RDD ID":1,"Name":"1","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.map(RDD.scala:271)\n$line10.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:14)\n$line10.$read$$iwC$$iwC$$iwC.<init>(<console>:19)\n$line10.$read$$iwC$$iwC.<init>(<console>:21)\n$line10.$read$$iwC.<init>(<console>:23)\n$line10.$read.<init>(<console>:25)\n$line10.$read$.<init>(<console>:29)\n$line10.$read$.<clinit>(<console>)\n$line10.$eval$.<init>(<console>:7)\n$line10.$eval$.<clinit>(<console>)\n$line10.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Submission Time":1422981785829,"Completion Time":1422981786286,"Accumulables":[]}}
+{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":2,"Stage Attempt ID":0,"Stage Name":"count at <console>:20","Number of Tasks":8,"RDD Info":[{"RDD ID":3,"Name":"3","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":2,"Name":"2","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line11.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:20)\n$line11.$read$$iwC$$iwC$$iwC.<init>(<console>:25)\n$line11.$read$$iwC$$iwC.<init>(<console>:27)\n$line11.$read$$iwC.<init>(<console>:29)\n$line11.$read.<init>(<console>:31)\n$line11.$read$.<init>(<console>:35)\n$line11.$read$.<clinit>(<console>)\n$line11.$eval$.<init>(<console>:7)\n$line11.$eval$.<clinit>(<console>)\n$line11.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":2,"Stage Attempt ID":0,"Task Info":{"Task ID":16,"Index":0,"Attempt":0,"Launch Time":1422981786296,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":2,"Stage Attempt ID":0,"Task Info":{"Task ID":17,"Index":1,"Attempt":0,"Launch Time":1422981786297,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":2,"Stage Attempt ID":0,"Task Info":{"Task ID":18,"Index":2,"Attempt":0,"Launch Time":1422981786297,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":2,"Stage Attempt ID":0,"Task Info":{"Task ID":19,"Index":3,"Attempt":0,"Launch Time":1422981786297,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":2,"Stage Attempt ID":0,"Task Info":{"Task ID":20,"Index":4,"Attempt":0,"Launch Time":1422981786298,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":2,"Stage Attempt ID":0,"Task Info":{"Task ID":21,"Index":5,"Attempt":0,"Launch Time":1422981786298,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":2,"Stage Attempt ID":0,"Task Info":{"Task ID":22,"Index":6,"Attempt":0,"Launch Time":1422981786298,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":2,"Stage Attempt ID":0,"Task Info":{"Task ID":23,"Index":7,"Attempt":0,"Launch Time":1422981786299,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":2,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":20,"Index":4,"Attempt":0,"Launch Time":1422981786298,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981786337,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":34,"Result Size":862,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":2,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":21,"Index":5,"Attempt":0,"Launch Time":1422981786298,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981786339,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":35,"Result Size":862,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":2,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":17,"Index":1,"Attempt":0,"Launch Time":1422981786297,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981786340,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":35,"Result Size":862,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":1,"Remote Bytes Read":0}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":2,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":18,"Index":2,"Attempt":0,"Launch Time":1422981786297,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981786340,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":34,"Result Size":862,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":2,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":16,"Index":0,"Attempt":0,"Launch Time":1422981786296,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981786340,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":35,"Result Size":862,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":2,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":23,"Index":7,"Attempt":0,"Launch Time":1422981786299,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981786341,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":35,"Result Size":862,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":2,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":22,"Index":6,"Attempt":0,"Launch Time":1422981786298,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981786342,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":34,"Result Size":862,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":2,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"ExceptionFailure","Class Name":"java.lang.RuntimeException","Description":"got a 3, failing","Stack Trace":[{"Declaring Class":"$line11.$read$$iwC$$iwC$$iwC$$iwC$$anonfun$1","Method Name":"apply","File Name":"<console>","Line Number":18},{"Declaring Class":"$line11.$read$$iwC$$iwC$$iwC$$iwC$$anonfun$1","Method Name":"apply","File Name":"<console>","Line Number":17},{"Declaring Class":"scala.collection.Iterator$$anon$11","Method Name":"next","File Name":"Iterator.scala","Line Number":328},{"Declaring Class":"org.apache.spark.util.Utils$","Method Name":"getIteratorSize","File Name":"Utils.scala","Line Number":1311},{"Declaring Class":"org.apache.spark.rdd.RDD$$anonfun$count$1","Method Name":"apply","File Name":"RDD.scala","Line Number":910},{"Declaring Class":"org.apache.spark.rdd.RDD$$anonfun$count$1","Method Name":"apply","File Name":"RDD.scala","Line Number":910},{"Declaring Class":"org.apache.spark.SparkContext$$anonfun$runJob$4","Method Name":"apply","File Name":"SparkContext.scala","Line Number":1314},{"Declaring Class":"org.apache.spark.SparkContext$$anonfun$runJob$4","Method Name":"apply","File Name":"SparkContext.scala","Line Number":1314},{"Declaring Class":"org.apache.spark.scheduler.ResultTask","Method Name":"runTask","File Name":"ResultTask.scala","Line Number":61},{"Declaring Class":"org.apache.spark.scheduler.Task","Method Name":"run","File Name":"Task.scala","Line Number":56},{"Declaring Class":"org.apache.spark.executor.Executor$TaskRunner","Method Name":"run","File Name":"Executor.scala","Line Number":196},{"Declaring Class":"java.util.concurrent.ThreadPoolExecutor","Method Name":"runWorker","File Name":"ThreadPoolExecutor.java","Line Number":1145},{"Declaring Class":"java.util.concurrent.ThreadPoolExecutor$Worker","Method Name":"run","File Name":"ThreadPoolExecutor.java","Line Number":615},{"Declaring Class":"java.lang.Thread","Method Name":"run","File Name":"Thread.java","Line Number":745}],"Full Stack Trace":"java.lang.RuntimeException: got a 3, failing\n\tat $line11.$read$$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(<console>:18)\n\tat $line11.$read$$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(<console>:17)\n\tat scala.collection.Iterator$$anon$11.next(Iterator.scala:328)\n\tat org.apache.spark.util.Utils$.getIteratorSize(Utils.scala:1311)\n\tat org.apache.spark.rdd.RDD$$anonfun$count$1.apply(RDD.scala:910)\n\tat org.apache.spark.rdd.RDD$$anonfun$count$1.apply(RDD.scala:910)\n\tat org.apache.spark.SparkContext$$anonfun$runJob$4.apply(SparkContext.scala:1314)\n\tat org.apache.spark.SparkContext$$anonfun$runJob$4.apply(SparkContext.scala:1314)\n\tat org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:61)\n\tat org.apache.spark.scheduler.Task.run(Task.scala:56)\n\tat org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:196)\n\tat java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)\n\tat java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)\n\tat java.lang.Thread.run(Thread.java:745)\n","Metrics":{"Host Name":"localhost","Executor Deserialize Time":0,"Executor Run Time":36,"Result Size":0,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}},"Task Info":{"Task ID":19,"Index":3,"Attempt":0,"Launch Time":1422981786297,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981786343,"Failed":true,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":0,"Executor Run Time":36,"Result Size":0,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
+{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":2,"Stage Attempt ID":0,"Stage Name":"count at <console>:20","Number of Tasks":8,"RDD Info":[{"RDD ID":3,"Name":"3","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":2,"Name":"2","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line11.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:20)\n$line11.$read$$iwC$$iwC$$iwC.<init>(<console>:25)\n$line11.$read$$iwC$$iwC.<init>(<console>:27)\n$line11.$read$$iwC.<init>(<console>:29)\n$line11.$read.<init>(<console>:31)\n$line11.$read$.<init>(<console>:35)\n$line11.$read$.<clinit>(<console>)\n$line11.$eval$.<init>(<console>:7)\n$line11.$eval$.<clinit>(<console>)\n$line11.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Submission Time":1422981786296,"Completion Time":1422981786347,"Failure Reason":"Job aborted due to stage failure: Task 3 in stage 2.0 failed 1 times, most recent failure: Lost task 3.0 in stage 2.0 (TID 19, localhost): java.lang.RuntimeException: got a 3, failing\n\tat $line11.$read$$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(<console>:18)\n\tat $line11.$read$$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(<console>:17)\n\tat scala.collection.Iterator$$anon$11.next(Iterator.scala:328)\n\tat org.apache.spark.util.Utils$.getIteratorSize(Utils.scala:1311)\n\tat org.apache.spark.rdd.RDD$$anonfun$count$1.apply(RDD.scala:910)\n\tat org.apache.spark.rdd.RDD$$anonfun$count$1.apply(RDD.scala:910)\n\tat org.apache.spark.SparkContext$$anonfun$runJob$4.apply(SparkContext.scala:1314)\n\tat org.apache.spark.SparkContext$$anonfun$runJob$4.apply(SparkContext.scala:1314)\n\tat org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:61)\n\tat org.apache.spark.scheduler.Task.run(Task.scala:56)\n\tat org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:196)\n\tat java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)\n\tat java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)\n\tat java.lang.Thread.run(Thread.java:745)\n\nDriver stacktrace:","Accumulables":[]}}
+{"Event":"SparkListenerJobEnd","Job ID":1,"Job Result":{"Result":"JobFailed","Exception":{"Message":"Job aborted due to stage failure: Task 3 in stage 2.0 failed 1 times, most recent failure: Lost task 3.0 in stage 2.0 (TID 19, localhost): java.lang.RuntimeException: got a 3, failing\n\tat $line11.$read$$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(<console>:18)\n\tat $line11.$read$$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(<console>:17)\n\tat scala.collection.Iterator$$anon$11.next(Iterator.scala:328)\n\tat org.apache.spark.util.Utils$.getIteratorSize(Utils.scala:1311)\n\tat org.apache.spark.rdd.RDD$$anonfun$count$1.apply(RDD.scala:910)\n\tat org.apache.spark.rdd.RDD$$anonfun$count$1.apply(RDD.scala:910)\n\tat org.apache.spark.SparkContext$$anonfun$runJob$4.apply(SparkContext.scala:1314)\n\tat org.apache.spark.SparkContext$$anonfun$runJob$4.apply(SparkContext.scala:1314)\n\tat org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:61)\n\tat org.apache.spark.scheduler.Task.run(Task.scala:56)\n\tat org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:196)\n\tat java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)\n\tat java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)\n\tat java.lang.Thread.run(Thread.java:745)\n\nDriver stacktrace:","Stack Trace":[{"Declaring Class":"org.apache.spark.scheduler.DAGScheduler","Method Name":"org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages","File Name":"DAGScheduler.scala","Line Number":1214},{"Declaring Class":"org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1","Method Name":"apply","File Name":"DAGScheduler.scala","Line Number":1203},{"Declaring Class":"org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1","Method Name":"apply","File Name":"DAGScheduler.scala","Line Number":1202},{"Declaring Class":"scala.collection.mutable.ResizableArray$class","Method Name":"foreach","File Name":"ResizableArray.scala","Line Number":59},{"Declaring Class":"scala.collection.mutable.ArrayBuffer","Method Name":"foreach","File Name":"ArrayBuffer.scala","Line Number":47},{"Declaring Class":"org.apache.spark.scheduler.DAGScheduler","Method Name":"abortStage","File Name":"DAGScheduler.scala","Line Number":1202},{"Declaring Class":"org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1","Method Name":"apply","File Name":"DAGScheduler.scala","Line Number":696},{"Declaring Class":"org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1","Method Name":"apply","File Name":"DAGScheduler.scala","Line Number":696},{"Declaring Class":"scala.Option","Method Name":"foreach","File Name":"Option.scala","Line Number":236},{"Declaring Class":"org.apache.spark.scheduler.DAGScheduler","Method Name":"handleTaskSetFailed","File Name":"DAGScheduler.scala","Line Number":696},{"Declaring Class":"org.apache.spark.scheduler.DAGSchedulerEventProcessActor$$anonfun$receive$2","Method Name":"applyOrElse","File Name":"DAGScheduler.scala","Line Number":1420},{"Declaring Class":"akka.actor.Actor$class","Method Name":"aroundReceive","File Name":"Actor.scala","Line Number":465},{"Declaring Class":"org.apache.spark.scheduler.DAGSchedulerEventProcessActor","Method Name":"aroundReceive","File Name":"DAGScheduler.scala","Line Number":1375},{"Declaring Class":"akka.actor.ActorCell","Method Name":"receiveMessage","File Name":"ActorCell.scala","Line Number":516},{"Declaring Class":"akka.actor.ActorCell","Method Name":"invoke","File Name":"ActorCell.scala","Line Number":487},{"Declaring Class":"akka.dispatch.Mailbox","Method Name":"processMailbox","File Name":"Mailbox.scala","Line Number":238},{"Declaring Class":"akka.dispatch.Mailbox","Method Name":"run","File Name":"Mailbox.scala","Line Number":220},{"Declaring Class":"akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask","Method Name":"exec","File Name":"AbstractDispatcher.scala","Line Number":393},{"Declaring Class":"scala.concurrent.forkjoin.ForkJoinTask","Method Name":"doExec","File Name":"ForkJoinTask.java","Line Number":260},{"Declaring Class":"scala.concurrent.forkjoin.ForkJoinPool$WorkQueue","Method Name":"runTask","File Name":"ForkJoinPool.java","Line Number":1339},{"Declaring Class":"scala.concurrent.forkjoin.ForkJoinPool","Method Name":"runWorker","File Name":"ForkJoinPool.java","Line Number":1979},{"Declaring Class":"scala.concurrent.forkjoin.ForkJoinWorkerThread","Method Name":"run","File Name":"ForkJoinWorkerThread.java","Line Number":107}]}}}
+{"Event":"SparkListenerJobStart","Job ID":2,"Stage Infos":[{"Stage ID":3,"Stage Attempt ID":0,"Stage Name":"count at <console>:17","Number of Tasks":8,"RDD Info":[{"RDD ID":6,"Name":"/Users/irashid/spark-examples/tmp_data/sums_2","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":5,"Name":"/Users/irashid/spark-examples/tmp_data/sums_2","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line19.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:17)\n$line19.$read$$iwC$$iwC$$iwC.<init>(<console>:22)\n$line19.$read$$iwC$$iwC.<init>(<console>:24)\n$line19.$read$$iwC.<init>(<console>:26)\n$line19.$read.<init>(<console>:28)\n$line19.$read$.<init>(<console>:32)\n$line19.$read$.<clinit>(<console>)\n$line19.$eval$.<init>(<console>:7)\n$line19.$eval$.<clinit>(<console>)\n$line19.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}],"Stage IDs":[3]}
+{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":3,"Stage Attempt ID":0,"Stage Name":"count at <console>:17","Number of Tasks":8,"RDD Info":[{"RDD ID":6,"Name":"/Users/irashid/spark-examples/tmp_data/sums_2","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":5,"Name":"/Users/irashid/spark-examples/tmp_data/sums_2","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line19.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:17)\n$line19.$read$$iwC$$iwC$$iwC.<init>(<console>:22)\n$line19.$read$$iwC$$iwC.<init>(<console>:24)\n$line19.$read$$iwC.<init>(<console>:26)\n$line19.$read.<init>(<console>:28)\n$line19.$read$.<init>(<console>:32)\n$line19.$read$.<clinit>(<console>)\n$line19.$eval$.<init>(<console>:7)\n$line19.$eval$.<clinit>(<console>)\n$line19.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":3,"Stage Attempt ID":0,"Task Info":{"Task ID":24,"Index":0,"Attempt":0,"Launch Time":1422981787191,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":3,"Stage Attempt ID":0,"Task Info":{"Task ID":25,"Index":1,"Attempt":0,"Launch Time":1422981787191,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":3,"Stage Attempt ID":0,"Task Info":{"Task ID":26,"Index":2,"Attempt":0,"Launch Time":1422981787192,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":3,"Stage Attempt ID":0,"Task Info":{"Task ID":27,"Index":3,"Attempt":0,"Launch Time":1422981787192,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":3,"Stage Attempt ID":0,"Task Info":{"Task ID":28,"Index":4,"Attempt":0,"Launch Time":1422981787193,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":3,"Stage Attempt ID":0,"Task Info":{"Task ID":29,"Index":5,"Attempt":0,"Launch Time":1422981787193,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":3,"Stage Attempt ID":0,"Task Info":{"Task ID":30,"Index":6,"Attempt":0,"Launch Time":1422981787194,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":3,"Stage Attempt ID":0,"Task Info":{"Task ID":31,"Index":7,"Attempt":0,"Launch Time":1422981787194,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":3,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":25,"Index":1,"Attempt":0,"Launch Time":1422981787191,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981787222,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":5,"Executor Run Time":20,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":32}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":3,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":24,"Index":0,"Attempt":0,"Launch Time":1422981787191,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981787223,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":5,"Executor Run Time":20,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":32}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":3,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":26,"Index":2,"Attempt":0,"Launch Time":1422981787192,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981787223,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":4,"Executor Run Time":22,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":16}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":3,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":31,"Index":7,"Attempt":0,"Launch Time":1422981787194,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981787223,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":5,"Executor Run Time":19,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":16}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":3,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":29,"Index":5,"Attempt":0,"Launch Time":1422981787193,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981787224,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":5,"Executor Run Time":19,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":16}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":3,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":27,"Index":3,"Attempt":0,"Launch Time":1422981787192,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981787224,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":6,"Executor Run Time":19,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":16}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":3,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":28,"Index":4,"Attempt":0,"Launch Time":1422981787193,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981787225,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":3,"Executor Run Time":22,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":16}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":3,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":30,"Index":6,"Attempt":0,"Launch Time":1422981787194,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981787225,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":3,"Executor Run Time":21,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":16}}}
+{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":3,"Stage Attempt ID":0,"Stage Name":"count at <console>:17","Number of Tasks":8,"RDD Info":[{"RDD ID":6,"Name":"/Users/irashid/spark-examples/tmp_data/sums_2","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":5,"Name":"/Users/irashid/spark-examples/tmp_data/sums_2","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line19.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:17)\n$line19.$read$$iwC$$iwC$$iwC.<init>(<console>:22)\n$line19.$read$$iwC$$iwC.<init>(<console>:24)\n$line19.$read$$iwC.<init>(<console>:26)\n$line19.$read.<init>(<console>:28)\n$line19.$read$.<init>(<console>:32)\n$line19.$read$.<clinit>(<console>)\n$line19.$eval$.<init>(<console>:7)\n$line19.$eval$.<clinit>(<console>)\n$line19.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Submission Time":1422981787191,"Completion Time":1422981787226,"Accumulables":[]}}
+{"Event":"SparkListenerJobEnd","Job ID":2,"Job Result":{"Result":"JobSucceeded"}}
+{"Event":"SparkListenerApplicationEnd","Timestamp":1422981788731}
diff --git a/core/src/test/resources/spark-events/local-1422981780767/SPARK_VERSION_1.2.0 b/core/src/test/resources/spark-events/local-1422981780767/SPARK_VERSION_1.2.0
new file mode 100755
index 0000000000000..e69de29bb2d1d
diff --git a/core/src/test/resources/spark-events/local-1425081759269/APPLICATION_COMPLETE b/core/src/test/resources/spark-events/local-1425081759269/APPLICATION_COMPLETE
new file mode 100755
index 0000000000000..e69de29bb2d1d
diff --git a/core/src/test/resources/spark-events/local-1425081759269/EVENT_LOG_1 b/core/src/test/resources/spark-events/local-1425081759269/EVENT_LOG_1
new file mode 100755
index 0000000000000..9745b36b09e44
--- /dev/null
+++ b/core/src/test/resources/spark-events/local-1425081759269/EVENT_LOG_1
@@ -0,0 +1,88 @@
+{"Event":"SparkListenerBlockManagerAdded","Block Manager ID":{"Executor ID":"<driver>","Host":"localhost","Port":57967},"Maximum Memory":278302556,"Timestamp":1425081759407}
+{"Event":"SparkListenerEnvironmentUpdate","JVM Information":{"Java Home":"/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre","Java Version":"1.7.0_67 (Oracle Corporation)","Scala Version":"version 2.10.4"},"Spark Properties":{"spark.driver.host":"192.168.1.103","spark.eventLog.enabled":"true","spark.driver.port":"57965","spark.repl.class.uri":"http://192.168.1.103:57964","spark.jars":"","spark.app.name":"Spark shell","spark.scheduler.mode":"FIFO","spark.executor.id":"driver","spark.master":"local[*]","spark.fileserver.uri":"http://192.168.1.103:57966","spark.tachyonStore.folderName":"spark-fd6c823a-8a18-4113-8306-1fa7bb623a7f","spark.app.id":"local-1425081759269"},"System Properties":{"java.io.tmpdir":"/var/folders/36/m29jw1z95qv4ywb1c4n0rz000000gp/T/","line.separator":"\n","path.separator":":","sun.management.compiler":"HotSpot 64-Bit Tiered Compilers","SPARK_SUBMIT":"true","sun.cpu.endian":"little","java.specification.version":"1.7","java.vm.specification.name":"Java Virtual Machine Specification","java.vendor":"Oracle Corporation","java.vm.specification.version":"1.7","user.home":"/Users/irashid","file.encoding.pkg":"sun.io","sun.nio.ch.bugLevel":"","ftp.nonProxyHosts":"local|*.local|169.254/16|*.169.254/16","sun.arch.data.model":"64","sun.boot.library.path":"/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib","user.dir":"/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4","java.library.path":"/Users/irashid/Library/Java/Extensions:/Library/Java/Extensions:/Network/Library/Java/Extensions:/System/Library/Java/Extensions:/usr/lib/java:.","sun.cpu.isalist":"","os.arch":"x86_64","java.vm.version":"24.65-b04","java.endorsed.dirs":"/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/endorsed","java.runtime.version":"1.7.0_67-b01","java.vm.info":"mixed mode","java.ext.dirs":"/Users/irashid/Library/Java/Extensions:/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/ext:/Library/Java/Extensions:/Network/Library/Java/Extensions:/System/Library/Java/Extensions:/usr/lib/java","java.runtime.name":"Java(TM) SE Runtime Environment","file.separator":"/","java.class.version":"51.0","java.specification.name":"Java Platform API Specification","sun.boot.class.path":"/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/resources.jar:/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/rt.jar:/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/sunrsasign.jar:/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/jsse.jar:/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/jce.jar:/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/charsets.jar:/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/jfr.jar:/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/classes","file.encoding":"UTF-8","user.timezone":"America/Chicago","java.specification.vendor":"Oracle Corporation","sun.java.launcher":"SUN_STANDARD","os.version":"10.9.5","sun.os.patch.level":"unknown","gopherProxySet":"false","java.vm.specification.vendor":"Oracle Corporation","user.country":"US","sun.jnu.encoding":"UTF-8","http.nonProxyHosts":"local|*.local|169.254/16|*.169.254/16","user.language":"en","socksNonProxyHosts":"local|*.local|169.254/16|*.169.254/16","java.vendor.url":"http://java.oracle.com/","java.awt.printerjob":"sun.lwawt.macosx.CPrinterJob","java.awt.graphicsenv":"sun.awt.CGraphicsEnvironment","awt.toolkit":"sun.lwawt.macosx.LWCToolkit","os.name":"Mac OS X","java.vm.vendor":"Oracle Corporation","java.vendor.url.bug":"http://bugreport.sun.com/bugreport/","user.name":"irashid","java.vm.name":"Java HotSpot(TM) 64-Bit Server VM","sun.java.command":"org.apache.spark.deploy.SparkSubmit --class org.apache.spark.repl.Main --conf spark.eventLog.enabled=true spark-shell","java.home":"/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre","java.version":"1.7.0_67","sun.io.unicode.encoding":"UnicodeBig"},"Classpath Entries":{"/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/lib/spark-assembly-1.2.0-hadoop2.4.0.jar":"System Classpath","/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/lib/datanucleus-api-jdo-3.2.6.jar":"System Classpath","/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/lib/datanucleus-rdbms-3.2.9.jar":"System Classpath","/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/lib/datanucleus-core-3.2.10.jar":"System Classpath","/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/conf":"System Classpath"}}
+{"Event":"SparkListenerApplicationStart","App Name":"Spark shell","App ID":"local-1425081759269","Timestamp":1425081758277,"User":"irashid"}
+{"Event":"SparkListenerJobStart","Job ID":0,"Stage Infos":[{"Stage ID":0,"Stage Attempt ID":0,"Stage Name":"count at <console>:15","Number of Tasks":8,"RDD Info":[{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line9.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:15)\n$line9.$read$$iwC$$iwC$$iwC.<init>(<console>:20)\n$line9.$read$$iwC$$iwC.<init>(<console>:22)\n$line9.$read$$iwC.<init>(<console>:24)\n$line9.$read.<init>(<console>:26)\n$line9.$read$.<init>(<console>:30)\n$line9.$read$.<clinit>(<console>)\n$line9.$eval$.<init>(<console>:7)\n$line9.$eval$.<clinit>(<console>)\n$line9.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}],"Stage IDs":[0]}
+{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":0,"Stage Attempt ID":0,"Stage Name":"count at <console>:15","Number of Tasks":8,"RDD Info":[{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line9.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:15)\n$line9.$read$$iwC$$iwC$$iwC.<init>(<console>:20)\n$line9.$read$$iwC$$iwC.<init>(<console>:22)\n$line9.$read$$iwC.<init>(<console>:24)\n$line9.$read.<init>(<console>:26)\n$line9.$read$.<init>(<console>:30)\n$line9.$read$.<clinit>(<console>)\n$line9.$eval$.<init>(<console>:7)\n$line9.$eval$.<clinit>(<console>)\n$line9.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":0,"Index":0,"Attempt":0,"Launch Time":1425081762075,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":1,"Index":1,"Attempt":0,"Launch Time":1425081762081,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":2,"Index":2,"Attempt":0,"Launch Time":1425081762081,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":3,"Index":3,"Attempt":0,"Launch Time":1425081762082,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":4,"Index":4,"Attempt":0,"Launch Time":1425081762083,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":5,"Index":5,"Attempt":0,"Launch Time":1425081762084,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":6,"Index":6,"Attempt":0,"Launch Time":1425081762084,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":7,"Index":7,"Attempt":0,"Launch Time":1425081762085,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":6,"Index":6,"Attempt":0,"Launch Time":1425081762084,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081762632,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":10,"Executor Run Time":521,"Result Size":1268,"JVM GC Time":20,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_6","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":2,"Index":2,"Attempt":0,"Launch Time":1425081762081,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081762633,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":10,"Executor Run Time":520,"Result Size":1268,"JVM GC Time":20,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_2","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":3,"Index":3,"Attempt":0,"Launch Time":1425081762082,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081762634,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":10,"Executor Run Time":521,"Result Size":1268,"JVM GC Time":20,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_3","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":5,"Index":5,"Attempt":0,"Launch Time":1425081762084,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081762634,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":10,"Executor Run Time":522,"Result Size":1268,"JVM GC Time":20,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_5","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":4,"Index":4,"Attempt":0,"Launch Time":1425081762083,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081762635,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":10,"Executor Run Time":522,"Result Size":1268,"JVM GC Time":20,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_4","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":0,"Index":0,"Attempt":0,"Launch Time":1425081762075,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081762636,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":11,"Executor Run Time":522,"Result Size":1268,"JVM GC Time":20,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_0","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":7,"Index":7,"Attempt":0,"Launch Time":1425081762085,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081762636,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":9,"Executor Run Time":521,"Result Size":1268,"JVM GC Time":20,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_7","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":1,"Index":1,"Attempt":0,"Launch Time":1425081762081,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081762637,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":11,"Executor Run Time":522,"Result Size":1268,"JVM GC Time":20,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_1","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}}
+{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":0,"Stage Attempt ID":0,"Stage Name":"count at <console>:15","Number of Tasks":8,"RDD Info":[{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":8,"Memory Size":28000128,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line9.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:15)\n$line9.$read$$iwC$$iwC$$iwC.<init>(<console>:20)\n$line9.$read$$iwC$$iwC.<init>(<console>:22)\n$line9.$read$$iwC.<init>(<console>:24)\n$line9.$read.<init>(<console>:26)\n$line9.$read$.<init>(<console>:30)\n$line9.$read$.<clinit>(<console>)\n$line9.$eval$.<init>(<console>:7)\n$line9.$eval$.<clinit>(<console>)\n$line9.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Submission Time":1425081762069,"Completion Time":1425081762637,"Accumulables":[]}}
+{"Event":"SparkListenerJobEnd","Job ID":0,"Job Result":{"Result":"JobSucceeded"}}
+{"Event":"SparkListenerJobStart","Job ID":1,"Stage Infos":[{"Stage ID":1,"Stage Attempt ID":0,"Stage Name":"map at <console>:14","Number of Tasks":8,"RDD Info":[{"RDD ID":1,"Name":"1","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.map(RDD.scala:271)\n$line10.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:14)\n$line10.$read$$iwC$$iwC$$iwC.<init>(<console>:19)\n$line10.$read$$iwC$$iwC.<init>(<console>:21)\n$line10.$read$$iwC.<init>(<console>:23)\n$line10.$read.<init>(<console>:25)\n$line10.$read$.<init>(<console>:29)\n$line10.$read$.<clinit>(<console>)\n$line10.$eval$.<init>(<console>:7)\n$line10.$eval$.<clinit>(<console>)\n$line10.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]},{"Stage ID":2,"Stage Attempt ID":0,"Stage Name":"first at <console>:17","Number of Tasks":1,"RDD Info":[{"RDD ID":2,"Name":"2","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.first(RDD.scala:1093)\n$line11.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:17)\n$line11.$read$$iwC$$iwC$$iwC.<init>(<console>:22)\n$line11.$read$$iwC$$iwC.<init>(<console>:24)\n$line11.$read$$iwC.<init>(<console>:26)\n$line11.$read.<init>(<console>:28)\n$line11.$read$.<init>(<console>:32)\n$line11.$read$.<clinit>(<console>)\n$line11.$eval$.<init>(<console>:7)\n$line11.$eval$.<clinit>(<console>)\n$line11.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}],"Stage IDs":[1,2]}
+{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":1,"Stage Attempt ID":0,"Stage Name":"map at <console>:14","Number of Tasks":8,"RDD Info":[{"RDD ID":1,"Name":"1","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.map(RDD.scala:271)\n$line10.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:14)\n$line10.$read$$iwC$$iwC$$iwC.<init>(<console>:19)\n$line10.$read$$iwC$$iwC.<init>(<console>:21)\n$line10.$read$$iwC.<init>(<console>:23)\n$line10.$read.<init>(<console>:25)\n$line10.$read$.<init>(<console>:29)\n$line10.$read$.<clinit>(<console>)\n$line10.$eval$.<init>(<console>:7)\n$line10.$eval$.<clinit>(<console>)\n$line10.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":8,"Index":0,"Attempt":0,"Launch Time":1425081763578,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":9,"Index":1,"Attempt":0,"Launch Time":1425081763578,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":10,"Index":2,"Attempt":0,"Launch Time":1425081763579,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":11,"Index":3,"Attempt":0,"Launch Time":1425081763579,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":12,"Index":4,"Attempt":0,"Launch Time":1425081763580,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":13,"Index":5,"Attempt":0,"Launch Time":1425081763580,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":14,"Index":6,"Attempt":0,"Launch Time":1425081763581,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":15,"Index":7,"Attempt":0,"Launch Time":1425081763581,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":10,"Index":2,"Attempt":0,"Launch Time":1425081763579,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081764001,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":406,"Result Size":1902,"JVM GC Time":18,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1648,"Shuffle Write Time":138000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":9,"Index":1,"Attempt":0,"Launch Time":1425081763578,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081764002,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":407,"Result Size":1902,"JVM GC Time":18,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1648,"Shuffle Write Time":106000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":12,"Index":4,"Attempt":0,"Launch Time":1425081763580,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081764002,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":407,"Result Size":1902,"JVM GC Time":18,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1645,"Shuffle Write Time":99000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":8,"Index":0,"Attempt":0,"Launch Time":1425081763578,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081764003,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":407,"Result Size":1902,"JVM GC Time":18,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1648,"Shuffle Write Time":123000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":15,"Index":7,"Attempt":0,"Launch Time":1425081763581,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081764003,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":406,"Result Size":1902,"JVM GC Time":18,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1648,"Shuffle Write Time":108000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":11,"Index":3,"Attempt":0,"Launch Time":1425081763579,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081764004,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":407,"Result Size":1902,"JVM GC Time":18,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1647,"Shuffle Write Time":97000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":14,"Index":6,"Attempt":0,"Launch Time":1425081763581,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081764004,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":407,"Result Size":1902,"JVM GC Time":18,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1648,"Shuffle Write Time":132000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":13,"Index":5,"Attempt":0,"Launch Time":1425081763580,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081764005,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":407,"Result Size":1902,"JVM GC Time":18,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1648,"Shuffle Write Time":81000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}}
+{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":1,"Stage Attempt ID":0,"Stage Name":"map at <console>:14","Number of Tasks":8,"RDD Info":[{"RDD ID":1,"Name":"1","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.map(RDD.scala:271)\n$line10.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:14)\n$line10.$read$$iwC$$iwC$$iwC.<init>(<console>:19)\n$line10.$read$$iwC$$iwC.<init>(<console>:21)\n$line10.$read$$iwC.<init>(<console>:23)\n$line10.$read.<init>(<console>:25)\n$line10.$read$.<init>(<console>:29)\n$line10.$read$.<clinit>(<console>)\n$line10.$eval$.<init>(<console>:7)\n$line10.$eval$.<clinit>(<console>)\n$line10.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Submission Time":1425081763578,"Completion Time":1425081764005,"Accumulables":[]}}
+{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":2,"Stage Attempt ID":0,"Stage Name":"first at <console>:17","Number of Tasks":1,"RDD Info":[{"RDD ID":2,"Name":"2","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.first(RDD.scala:1093)\n$line11.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:17)\n$line11.$read$$iwC$$iwC$$iwC.<init>(<console>:22)\n$line11.$read$$iwC$$iwC.<init>(<console>:24)\n$line11.$read$$iwC.<init>(<console>:26)\n$line11.$read.<init>(<console>:28)\n$line11.$read$.<init>(<console>:32)\n$line11.$read$.<clinit>(<console>)\n$line11.$eval$.<init>(<console>:7)\n$line11.$eval$.<clinit>(<console>)\n$line11.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":2,"Stage Attempt ID":0,"Task Info":{"Task ID":16,"Index":0,"Attempt":0,"Launch Time":1425081764014,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":2,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":16,"Index":0,"Attempt":0,"Launch Time":1425081764014,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081764045,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":0,"Executor Run Time":28,"Result Size":1013,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
+{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":2,"Stage Attempt ID":0,"Stage Name":"first at <console>:17","Number of Tasks":1,"RDD Info":[{"RDD ID":2,"Name":"2","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.first(RDD.scala:1093)\n$line11.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:17)\n$line11.$read$$iwC$$iwC$$iwC.<init>(<console>:22)\n$line11.$read$$iwC$$iwC.<init>(<console>:24)\n$line11.$read$$iwC.<init>(<console>:26)\n$line11.$read.<init>(<console>:28)\n$line11.$read$.<init>(<console>:32)\n$line11.$read$.<clinit>(<console>)\n$line11.$eval$.<init>(<console>:7)\n$line11.$eval$.<clinit>(<console>)\n$line11.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Submission Time":1425081764014,"Completion Time":1425081764045,"Accumulables":[]}}
+{"Event":"SparkListenerJobEnd","Job ID":1,"Job Result":{"Result":"JobSucceeded"}}
+{"Event":"SparkListenerJobStart","Job ID":2,"Stage Infos":[{"Stage ID":3,"Stage Attempt ID":0,"Stage Name":"map at <console>:14","Number of Tasks":8,"RDD Info":[{"RDD ID":1,"Name":"1","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.map(RDD.scala:271)\n$line10.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:14)\n$line10.$read$$iwC$$iwC$$iwC.<init>(<console>:19)\n$line10.$read$$iwC$$iwC.<init>(<console>:21)\n$line10.$read$$iwC.<init>(<console>:23)\n$line10.$read.<init>(<console>:25)\n$line10.$read$.<init>(<console>:29)\n$line10.$read$.<clinit>(<console>)\n$line10.$eval$.<init>(<console>:7)\n$line10.$eval$.<clinit>(<console>)\n$line10.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]},{"Stage ID":4,"Stage Attempt ID":0,"Stage Name":"saveAsTextFile at <console>:19","Number of Tasks":8,"RDD Info":[{"RDD ID":3,"Name":"3","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":2,"Name":"2","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.saveAsTextFile(RDD.scala:1164)\n$line13.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:19)\n$line13.$read$$iwC$$iwC$$iwC.<init>(<console>:24)\n$line13.$read$$iwC$$iwC.<init>(<console>:26)\n$line13.$read$$iwC.<init>(<console>:28)\n$line13.$read.<init>(<console>:30)\n$line13.$read$.<init>(<console>:34)\n$line13.$read$.<clinit>(<console>)\n$line13.$eval$.<init>(<console>:7)\n$line13.$eval$.<clinit>(<console>)\n$line13.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}],"Stage IDs":[3,4]}
+{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":4,"Stage Attempt ID":0,"Stage Name":"saveAsTextFile at <console>:19","Number of Tasks":8,"RDD Info":[{"RDD ID":3,"Name":"3","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":2,"Name":"2","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.saveAsTextFile(RDD.scala:1164)\n$line13.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:19)\n$line13.$read$$iwC$$iwC$$iwC.<init>(<console>:24)\n$line13.$read$$iwC$$iwC.<init>(<console>:26)\n$line13.$read$$iwC.<init>(<console>:28)\n$line13.$read.<init>(<console>:30)\n$line13.$read$.<init>(<console>:34)\n$line13.$read$.<clinit>(<console>)\n$line13.$eval$.<init>(<console>:7)\n$line13.$eval$.<clinit>(<console>)\n$line13.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":4,"Stage Attempt ID":0,"Task Info":{"Task ID":17,"Index":0,"Attempt":0,"Launch Time":1425081764396,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":4,"Stage Attempt ID":0,"Task Info":{"Task ID":18,"Index":1,"Attempt":0,"Launch Time":1425081764396,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":4,"Stage Attempt ID":0,"Task Info":{"Task ID":19,"Index":2,"Attempt":0,"Launch Time":1425081764397,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":4,"Stage Attempt ID":0,"Task Info":{"Task ID":20,"Index":3,"Attempt":0,"Launch Time":1425081764397,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":4,"Stage Attempt ID":0,"Task Info":{"Task ID":21,"Index":4,"Attempt":0,"Launch Time":1425081764398,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":4,"Stage Attempt ID":0,"Task Info":{"Task ID":22,"Index":5,"Attempt":0,"Launch Time":1425081764398,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":4,"Stage Attempt ID":0,"Task Info":{"Task ID":23,"Index":6,"Attempt":0,"Launch Time":1425081764398,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":4,"Stage Attempt ID":0,"Task Info":{"Task ID":24,"Index":7,"Attempt":0,"Launch Time":1425081764399,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":4,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":24,"Index":7,"Attempt":0,"Launch Time":1425081764399,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081764642,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":240,"Result Size":824,"JVM GC Time":31,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":4,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":19,"Index":2,"Attempt":0,"Launch Time":1425081764397,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081764643,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":241,"Result Size":824,"JVM GC Time":31,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":4,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":21,"Index":4,"Attempt":0,"Launch Time":1425081764398,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081764643,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":240,"Result Size":824,"JVM GC Time":31,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":4,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":22,"Index":5,"Attempt":0,"Launch Time":1425081764398,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081764645,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":241,"Result Size":824,"JVM GC Time":31,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":4,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":23,"Index":6,"Attempt":0,"Launch Time":1425081764398,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081764645,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":241,"Result Size":824,"JVM GC Time":31,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":4,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":17,"Index":0,"Attempt":0,"Launch Time":1425081764396,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081764646,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":243,"Result Size":824,"JVM GC Time":31,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":4,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":18,"Index":1,"Attempt":0,"Launch Time":1425081764396,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081764646,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":243,"Result Size":824,"JVM GC Time":31,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":4,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":20,"Index":3,"Attempt":0,"Launch Time":1425081764397,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081764648,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":247,"Result Size":824,"JVM GC Time":31,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
+{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":4,"Stage Attempt ID":0,"Stage Name":"saveAsTextFile at <console>:19","Number of Tasks":8,"RDD Info":[{"RDD ID":3,"Name":"3","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":2,"Name":"2","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.saveAsTextFile(RDD.scala:1164)\n$line13.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:19)\n$line13.$read$$iwC$$iwC$$iwC.<init>(<console>:24)\n$line13.$read$$iwC$$iwC.<init>(<console>:26)\n$line13.$read$$iwC.<init>(<console>:28)\n$line13.$read.<init>(<console>:30)\n$line13.$read$.<init>(<console>:34)\n$line13.$read$.<clinit>(<console>)\n$line13.$eval$.<init>(<console>:7)\n$line13.$eval$.<clinit>(<console>)\n$line13.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Submission Time":1425081764396,"Completion Time":1425081764648,"Accumulables":[]}}
+{"Event":"SparkListenerJobEnd","Job ID":2,"Job Result":{"Result":"JobSucceeded"}}
+{"Event":"SparkListenerJobStart","Job ID":3,"Stage Infos":[{"Stage ID":5,"Stage Attempt ID":0,"Stage Name":"count at <console>:17","Number of Tasks":8,"RDD Info":[{"RDD ID":5,"Name":"/Users/irashid/spark-examples/tmp_data/sums","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":4,"Name":"/Users/irashid/spark-examples/tmp_data/sums","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line15.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:17)\n$line15.$read$$iwC$$iwC$$iwC.<init>(<console>:22)\n$line15.$read$$iwC$$iwC.<init>(<console>:24)\n$line15.$read$$iwC.<init>(<console>:26)\n$line15.$read.<init>(<console>:28)\n$line15.$read$.<init>(<console>:32)\n$line15.$read$.<clinit>(<console>)\n$line15.$eval$.<init>(<console>:7)\n$line15.$eval$.<clinit>(<console>)\n$line15.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}],"Stage IDs":[5]}
+{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":5,"Stage Attempt ID":0,"Stage Name":"count at <console>:17","Number of Tasks":8,"RDD Info":[{"RDD ID":5,"Name":"/Users/irashid/spark-examples/tmp_data/sums","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":4,"Name":"/Users/irashid/spark-examples/tmp_data/sums","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line15.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:17)\n$line15.$read$$iwC$$iwC$$iwC.<init>(<console>:22)\n$line15.$read$$iwC$$iwC.<init>(<console>:24)\n$line15.$read$$iwC.<init>(<console>:26)\n$line15.$read.<init>(<console>:28)\n$line15.$read$.<init>(<console>:32)\n$line15.$read$.<clinit>(<console>)\n$line15.$eval$.<init>(<console>:7)\n$line15.$eval$.<clinit>(<console>)\n$line15.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":5,"Stage Attempt ID":0,"Task Info":{"Task ID":25,"Index":0,"Attempt":0,"Launch Time":1425081765026,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":5,"Stage Attempt ID":0,"Task Info":{"Task ID":26,"Index":1,"Attempt":0,"Launch Time":1425081765026,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":5,"Stage Attempt ID":0,"Task Info":{"Task ID":27,"Index":2,"Attempt":0,"Launch Time":1425081765027,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":5,"Stage Attempt ID":0,"Task Info":{"Task ID":28,"Index":3,"Attempt":0,"Launch Time":1425081765027,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":5,"Stage Attempt ID":0,"Task Info":{"Task ID":29,"Index":4,"Attempt":0,"Launch Time":1425081765027,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":5,"Stage Attempt ID":0,"Task Info":{"Task ID":30,"Index":5,"Attempt":0,"Launch Time":1425081765028,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":5,"Stage Attempt ID":0,"Task Info":{"Task ID":31,"Index":6,"Attempt":0,"Launch Time":1425081765028,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":5,"Stage Attempt ID":0,"Task Info":{"Task ID":32,"Index":7,"Attempt":0,"Launch Time":1425081765029,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":5,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":27,"Index":2,"Attempt":0,"Launch Time":1425081765027,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081765045,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":13,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":16}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":5,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":30,"Index":5,"Attempt":0,"Launch Time":1425081765028,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081765046,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":13,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":16}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":5,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":25,"Index":0,"Attempt":0,"Launch Time":1425081765026,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081765046,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":12,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":32}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":5,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":29,"Index":4,"Attempt":0,"Launch Time":1425081765027,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081765047,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":13,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":16}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":5,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":28,"Index":3,"Attempt":0,"Launch Time":1425081765027,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081765047,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":12,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":16}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":5,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":32,"Index":7,"Attempt":0,"Launch Time":1425081765029,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081765048,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":13,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":16}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":5,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":31,"Index":6,"Attempt":0,"Launch Time":1425081765028,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081765048,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":14,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":16}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":5,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":26,"Index":1,"Attempt":0,"Launch Time":1425081765026,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081765049,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":12,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":32}}}
+{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":5,"Stage Attempt ID":0,"Stage Name":"count at <console>:17","Number of Tasks":8,"RDD Info":[{"RDD ID":5,"Name":"/Users/irashid/spark-examples/tmp_data/sums","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":4,"Name":"/Users/irashid/spark-examples/tmp_data/sums","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line15.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:17)\n$line15.$read$$iwC$$iwC$$iwC.<init>(<console>:22)\n$line15.$read$$iwC$$iwC.<init>(<console>:24)\n$line15.$read$$iwC.<init>(<console>:26)\n$line15.$read.<init>(<console>:28)\n$line15.$read$.<init>(<console>:32)\n$line15.$read$.<clinit>(<console>)\n$line15.$eval$.<init>(<console>:7)\n$line15.$eval$.<clinit>(<console>)\n$line15.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Submission Time":1425081765026,"Completion Time":1425081765050,"Accumulables":[]}}
+{"Event":"SparkListenerJobEnd","Job ID":3,"Job Result":{"Result":"JobSucceeded"}}
+{"Event":"SparkListenerApplicationEnd","Timestamp":1425081766912}
diff --git a/core/src/test/resources/spark-events/local-1425081759269/SPARK_VERSION_1.2.0 b/core/src/test/resources/spark-events/local-1425081759269/SPARK_VERSION_1.2.0
new file mode 100755
index 0000000000000..e69de29bb2d1d
diff --git a/core/src/test/resources/spark-events/local-1426533911241/APPLICATION_COMPLETE b/core/src/test/resources/spark-events/local-1426533911241/APPLICATION_COMPLETE
new file mode 100755
index 0000000000000..e69de29bb2d1d
diff --git a/core/src/test/resources/spark-events/local-1426533911241/EVENT_LOG_1 b/core/src/test/resources/spark-events/local-1426533911241/EVENT_LOG_1
new file mode 100755
index 0000000000000..9ef5bd5d92de5
--- /dev/null
+++ b/core/src/test/resources/spark-events/local-1426533911241/EVENT_LOG_1
@@ -0,0 +1,24 @@
+{"Event":"SparkListenerBlockManagerAdded","Block Manager ID":{"Executor ID":"<driver>","Host":"localhost","Port":58610},"Maximum Memory":278019440,"Timestamp":1426533911361}
+{"Event":"SparkListenerEnvironmentUpdate","JVM Information":{"Java Home":"/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre","Java Version":"1.8.0_25 (Oracle Corporation)","Scala Version":"version 2.10.4"},"Spark Properties":{"spark.driver.host":"192.168.1.105","spark.eventLog.enabled":"true","spark.driver.port":"58608","spark.repl.class.uri":"http://192.168.1.105:58607","spark.jars":"","spark.app.name":"Spark shell","spark.scheduler.mode":"FIFO","spark.executor.id":"driver","spark.master":"local[*]","spark.fileserver.uri":"http://192.168.1.105:58609","spark.tachyonStore.folderName":"spark-5e9b7f26-8e97-4b43-82d6-25c141530da9","spark.app.id":"local-1426533911241"},"System Properties":{"java.io.tmpdir":"/var/folders/36/m29jw1z95qv4ywb1c4n0rz000000gp/T/","line.separator":"\n","path.separator":":","sun.management.compiler":"HotSpot 64-Bit Tiered Compilers","SPARK_SUBMIT":"true","sun.cpu.endian":"little","java.specification.version":"1.8","java.vm.specification.name":"Java Virtual Machine Specification","java.vendor":"Oracle Corporation","java.vm.specification.version":"1.8","user.home":"/Users/irashid","file.encoding.pkg":"sun.io","sun.nio.ch.bugLevel":"","ftp.nonProxyHosts":"local|*.local|169.254/16|*.169.254/16","sun.arch.data.model":"64","sun.boot.library.path":"/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib","user.dir":"/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4","java.library.path":"/Users/irashid/Library/Java/Extensions:/Library/Java/Extensions:/Network/Library/Java/Extensions:/System/Library/Java/Extensions:/usr/lib/java:.","sun.cpu.isalist":"","os.arch":"x86_64","java.vm.version":"25.25-b02","java.endorsed.dirs":"/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/endorsed","java.runtime.version":"1.8.0_25-b17","java.vm.info":"mixed mode","java.ext.dirs":"/Users/irashid/Library/Java/Extensions:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/ext:/Library/Java/Extensions:/Network/Library/Java/Extensions:/System/Library/Java/Extensions:/usr/lib/java","java.runtime.name":"Java(TM) SE Runtime Environment","file.separator":"/","java.class.version":"52.0","java.specification.name":"Java Platform API Specification","sun.boot.class.path":"/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/resources.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/rt.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/sunrsasign.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/jsse.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/jce.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/charsets.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/jfr.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/classes","file.encoding":"UTF-8","user.timezone":"America/Chicago","java.specification.vendor":"Oracle Corporation","sun.java.launcher":"SUN_STANDARD","os.version":"10.9.5","sun.os.patch.level":"unknown","gopherProxySet":"false","java.vm.specification.vendor":"Oracle Corporation","user.country":"US","sun.jnu.encoding":"UTF-8","http.nonProxyHosts":"local|*.local|169.254/16|*.169.254/16","user.language":"en","socksNonProxyHosts":"local|*.local|169.254/16|*.169.254/16","java.vendor.url":"http://java.oracle.com/","java.awt.printerjob":"sun.lwawt.macosx.CPrinterJob","java.awt.graphicsenv":"sun.awt.CGraphicsEnvironment","awt.toolkit":"sun.lwawt.macosx.LWCToolkit","os.name":"Mac OS X","java.vm.vendor":"Oracle Corporation","java.vendor.url.bug":"http://bugreport.sun.com/bugreport/","user.name":"irashid","java.vm.name":"Java HotSpot(TM) 64-Bit Server VM","sun.java.command":"org.apache.spark.deploy.SparkSubmit --class org.apache.spark.repl.Main --conf spark.eventLog.enabled=true spark-shell","java.home":"/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre","java.version":"1.8.0_25","sun.io.unicode.encoding":"UnicodeBig"},"Classpath Entries":{"/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/lib/spark-assembly-1.2.0-hadoop2.4.0.jar":"System Classpath","/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/lib/datanucleus-api-jdo-3.2.6.jar":"System Classpath","/etc/hadoop":"System Classpath","/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/lib/datanucleus-rdbms-3.2.9.jar":"System Classpath","/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/lib/datanucleus-core-3.2.10.jar":"System Classpath","/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/conf":"System Classpath"}}
+{"Event":"SparkListenerApplicationStart","App Name":"Spark shell","App ID":"local-1426533911241","Timestamp":1426533910242,"User":"irashid","App Attempt ID":"1"}
+{"Event":"SparkListenerJobStart","Job ID":0,"Stage Infos":[{"Stage ID":0,"Stage Attempt ID":0,"Stage Name":"foreach at <console>:15","Number of Tasks":8,"RDD Info":[{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.foreach(RDD.scala:765)\n$line9.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:15)\n$line9.$read$$iwC$$iwC$$iwC.<init>(<console>:20)\n$line9.$read$$iwC$$iwC.<init>(<console>:22)\n$line9.$read$$iwC.<init>(<console>:24)\n$line9.$read.<init>(<console>:26)\n$line9.$read$.<init>(<console>:30)\n$line9.$read$.<clinit>(<console>)\n$line9.$eval$.<init>(<console>:7)\n$line9.$eval$.<clinit>(<console>)\n$line9.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:483)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}],"Stage IDs":[0]}
+{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":0,"Stage Attempt ID":0,"Stage Name":"foreach at <console>:15","Number of Tasks":8,"RDD Info":[{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.foreach(RDD.scala:765)\n$line9.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:15)\n$line9.$read$$iwC$$iwC$$iwC.<init>(<console>:20)\n$line9.$read$$iwC$$iwC.<init>(<console>:22)\n$line9.$read$$iwC.<init>(<console>:24)\n$line9.$read.<init>(<console>:26)\n$line9.$read$.<init>(<console>:30)\n$line9.$read$.<clinit>(<console>)\n$line9.$eval$.<init>(<console>:7)\n$line9.$eval$.<clinit>(<console>)\n$line9.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:483)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":0,"Index":0,"Attempt":0,"Launch Time":1426533936515,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":1,"Index":1,"Attempt":0,"Launch Time":1426533936521,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":2,"Index":2,"Attempt":0,"Launch Time":1426533936522,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":3,"Index":3,"Attempt":0,"Launch Time":1426533936522,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":4,"Index":4,"Attempt":0,"Launch Time":1426533936522,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":5,"Index":5,"Attempt":0,"Launch Time":1426533936523,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":6,"Index":6,"Attempt":0,"Launch Time":1426533936523,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":7,"Index":7,"Attempt":0,"Launch Time":1426533936524,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":2,"Index":2,"Attempt":0,"Launch Time":1426533936522,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1426533936570,"Failed":false,"Accumulables":[{"ID":1,"Name":"my counter","Update":"378","Value":"378"}]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":13,"Executor Run Time":15,"Result Size":697,"JVM GC Time":0,"Result Serialization Time":2,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":3,"Index":3,"Attempt":0,"Launch Time":1426533936522,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1426533936572,"Failed":false,"Accumulables":[{"ID":1,"Name":"my counter","Update":"572","Value":"950"}]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":13,"Executor Run Time":15,"Result Size":697,"JVM GC Time":0,"Result Serialization Time":2,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":6,"Index":6,"Attempt":0,"Launch Time":1426533936523,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1426533936574,"Failed":false,"Accumulables":[{"ID":1,"Name":"my counter","Update":"978","Value":"1928"}]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":12,"Executor Run Time":15,"Result Size":697,"JVM GC Time":0,"Result Serialization Time":2,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":1,"Index":1,"Attempt":0,"Launch Time":1426533936521,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1426533936574,"Failed":false,"Accumulables":[{"ID":1,"Name":"my counter","Update":"247","Value":"2175"}]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":14,"Executor Run Time":15,"Result Size":697,"JVM GC Time":0,"Result Serialization Time":2,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":4,"Index":4,"Attempt":0,"Launch Time":1426533936522,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1426533936574,"Failed":false,"Accumulables":[{"ID":1,"Name":"my counter","Update":"678","Value":"2853"}]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":12,"Executor Run Time":15,"Result Size":697,"JVM GC Time":0,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":5,"Index":5,"Attempt":0,"Launch Time":1426533936523,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1426533936575,"Failed":false,"Accumulables":[{"ID":1,"Name":"my counter","Update":"897","Value":"3750"}]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":12,"Executor Run Time":15,"Result Size":697,"JVM GC Time":0,"Result Serialization Time":2,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":7,"Index":7,"Attempt":0,"Launch Time":1426533936524,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1426533936575,"Failed":false,"Accumulables":[{"ID":1,"Name":"my counter","Update":"1222","Value":"4972"}]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":12,"Executor Run Time":15,"Result Size":697,"JVM GC Time":0,"Result Serialization Time":2,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":0,"Index":0,"Attempt":0,"Launch Time":1426533936515,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1426533936576,"Failed":false,"Accumulables":[{"ID":1,"Name":"my counter","Update":"78","Value":"5050"}]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":14,"Executor Run Time":15,"Result Size":697,"JVM GC Time":0,"Result Serialization Time":2,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0}}
+{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":0,"Stage Attempt ID":0,"Stage Name":"foreach at <console>:15","Number of Tasks":8,"RDD Info":[{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.foreach(RDD.scala:765)\n$line9.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:15)\n$line9.$read$$iwC$$iwC$$iwC.<init>(<console>:20)\n$line9.$read$$iwC$$iwC.<init>(<console>:22)\n$line9.$read$$iwC.<init>(<console>:24)\n$line9.$read.<init>(<console>:26)\n$line9.$read$.<init>(<console>:30)\n$line9.$read$.<clinit>(<console>)\n$line9.$eval$.<init>(<console>:7)\n$line9.$eval$.<clinit>(<console>)\n$line9.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:483)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Submission Time":1426533936103,"Completion Time":1426533936579,"Accumulables":[{"ID":1,"Name":"my counter","Value":"5050"}]}}
+{"Event":"SparkListenerJobEnd","Job ID":0,"Job Result":{"Result":"JobSucceeded"}}
+{"Event":"SparkListenerApplicationEnd","Timestamp":1426533945177}
diff --git a/core/src/test/resources/spark-events/local-1426533911241/SPARK_VERSION_1.2.0 b/core/src/test/resources/spark-events/local-1426533911241/SPARK_VERSION_1.2.0
new file mode 100755
index 0000000000000..e69de29bb2d1d
diff --git a/core/src/test/resources/spark-events/local-1426633911242/APPLICATION_COMPLETE b/core/src/test/resources/spark-events/local-1426633911242/APPLICATION_COMPLETE
new file mode 100755
index 0000000000000..e69de29bb2d1d
diff --git a/core/src/test/resources/spark-events/local-1426633911242/EVENT_LOG_1 b/core/src/test/resources/spark-events/local-1426633911242/EVENT_LOG_1
new file mode 100755
index 0000000000000..e7043282107d3
--- /dev/null
+++ b/core/src/test/resources/spark-events/local-1426633911242/EVENT_LOG_1
@@ -0,0 +1,24 @@
+{"Event":"SparkListenerBlockManagerAdded","Block Manager ID":{"Executor ID":"<driver>","Host":"localhost","Port":58610},"Maximum Memory":278019440,"Timestamp":1426633911361}
+{"Event":"SparkListenerEnvironmentUpdate","JVM Information":{"Java Home":"/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre","Java Version":"1.8.0_25 (Oracle Corporation)","Scala Version":"version 2.10.4"},"Spark Properties":{"spark.driver.host":"192.168.1.105","spark.eventLog.enabled":"true","spark.driver.port":"58608","spark.repl.class.uri":"http://192.168.1.105:58607","spark.jars":"","spark.app.name":"Spark shell","spark.scheduler.mode":"FIFO","spark.executor.id":"driver","spark.master":"local[*]","spark.fileserver.uri":"http://192.168.1.105:58609","spark.tachyonStore.folderName":"spark-5e9b7f26-8e97-4b43-82d6-25c141530da9","spark.app.id":"local-1426633911241"},"System Properties":{"java.io.tmpdir":"/var/folders/36/m29jw1z95qv4ywb1c4n0rz000000gp/T/","line.separator":"\n","path.separator":":","sun.management.compiler":"HotSpot 64-Bit Tiered Compilers","SPARK_SUBMIT":"true","sun.cpu.endian":"little","java.specification.version":"1.8","java.vm.specification.name":"Java Virtual Machine Specification","java.vendor":"Oracle Corporation","java.vm.specification.version":"1.8","user.home":"/Users/irashid","file.encoding.pkg":"sun.io","sun.nio.ch.bugLevel":"","ftp.nonProxyHosts":"local|*.local|169.254/16|*.169.254/16","sun.arch.data.model":"64","sun.boot.library.path":"/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib","user.dir":"/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4","java.library.path":"/Users/irashid/Library/Java/Extensions:/Library/Java/Extensions:/Network/Library/Java/Extensions:/System/Library/Java/Extensions:/usr/lib/java:.","sun.cpu.isalist":"","os.arch":"x86_64","java.vm.version":"25.25-b02","java.endorsed.dirs":"/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/endorsed","java.runtime.version":"1.8.0_25-b17","java.vm.info":"mixed mode","java.ext.dirs":"/Users/irashid/Library/Java/Extensions:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/ext:/Library/Java/Extensions:/Network/Library/Java/Extensions:/System/Library/Java/Extensions:/usr/lib/java","java.runtime.name":"Java(TM) SE Runtime Environment","file.separator":"/","java.class.version":"52.0","java.specification.name":"Java Platform API Specification","sun.boot.class.path":"/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/resources.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/rt.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/sunrsasign.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/jsse.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/jce.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/charsets.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/jfr.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/classes","file.encoding":"UTF-8","user.timezone":"America/Chicago","java.specification.vendor":"Oracle Corporation","sun.java.launcher":"SUN_STANDARD","os.version":"10.9.5","sun.os.patch.level":"unknown","gopherProxySet":"false","java.vm.specification.vendor":"Oracle Corporation","user.country":"US","sun.jnu.encoding":"UTF-8","http.nonProxyHosts":"local|*.local|169.254/16|*.169.254/16","user.language":"en","socksNonProxyHosts":"local|*.local|169.254/16|*.169.254/16","java.vendor.url":"http://java.oracle.com/","java.awt.printerjob":"sun.lwawt.macosx.CPrinterJob","java.awt.graphicsenv":"sun.awt.CGraphicsEnvironment","awt.toolkit":"sun.lwawt.macosx.LWCToolkit","os.name":"Mac OS X","java.vm.vendor":"Oracle Corporation","java.vendor.url.bug":"http://bugreport.sun.com/bugreport/","user.name":"irashid","java.vm.name":"Java HotSpot(TM) 64-Bit Server VM","sun.java.command":"org.apache.spark.deploy.SparkSubmit --class org.apache.spark.repl.Main --conf spark.eventLog.enabled=true spark-shell","java.home":"/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre","java.version":"1.8.0_25","sun.io.unicode.encoding":"UnicodeBig"},"Classpath Entries":{"/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/lib/spark-assembly-1.2.0-hadoop2.4.0.jar":"System Classpath","/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/lib/datanucleus-api-jdo-3.2.6.jar":"System Classpath","/etc/hadoop":"System Classpath","/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/lib/datanucleus-rdbms-3.2.9.jar":"System Classpath","/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/lib/datanucleus-core-3.2.10.jar":"System Classpath","/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/conf":"System Classpath"}}
+{"Event":"SparkListenerApplicationStart","App Name":"Spark shell","App ID":"local-1426533911241","Timestamp":1426633910242,"User":"irashid","App Attempt ID":"2"}
+{"Event":"SparkListenerJobStart","Job ID":0,"Stage Infos":[{"Stage ID":0,"Stage Attempt ID":0,"Stage Name":"foreach at <console>:15","Number of Tasks":8,"RDD Info":[{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.foreach(RDD.scala:765)\n$line9.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:15)\n$line9.$read$$iwC$$iwC$$iwC.<init>(<console>:20)\n$line9.$read$$iwC$$iwC.<init>(<console>:22)\n$line9.$read$$iwC.<init>(<console>:24)\n$line9.$read.<init>(<console>:26)\n$line9.$read$.<init>(<console>:30)\n$line9.$read$.<clinit>(<console>)\n$line9.$eval$.<init>(<console>:7)\n$line9.$eval$.<clinit>(<console>)\n$line9.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:483)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}],"Stage IDs":[0]}
+{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":0,"Stage Attempt ID":0,"Stage Name":"foreach at <console>:15","Number of Tasks":8,"RDD Info":[{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.foreach(RDD.scala:765)\n$line9.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:15)\n$line9.$read$$iwC$$iwC$$iwC.<init>(<console>:20)\n$line9.$read$$iwC$$iwC.<init>(<console>:22)\n$line9.$read$$iwC.<init>(<console>:24)\n$line9.$read.<init>(<console>:26)\n$line9.$read$.<init>(<console>:30)\n$line9.$read$.<clinit>(<console>)\n$line9.$eval$.<init>(<console>:7)\n$line9.$eval$.<clinit>(<console>)\n$line9.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:483)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":0,"Index":0,"Attempt":0,"Launch Time":1426633936515,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":1,"Index":1,"Attempt":0,"Launch Time":1426633936521,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":2,"Index":2,"Attempt":0,"Launch Time":1426633936522,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":3,"Index":3,"Attempt":0,"Launch Time":1426633936522,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":4,"Index":4,"Attempt":0,"Launch Time":1426633936522,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":5,"Index":5,"Attempt":0,"Launch Time":1426633936523,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":6,"Index":6,"Attempt":0,"Launch Time":1426633936523,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":7,"Index":7,"Attempt":0,"Launch Time":1426633936524,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":2,"Index":2,"Attempt":0,"Launch Time":1426633936522,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1426633936570,"Failed":false,"Accumulables":[{"ID":1,"Name":"my counter","Update":"378","Value":"378"}]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":13,"Executor Run Time":15,"Result Size":697,"JVM GC Time":0,"Result Serialization Time":2,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":3,"Index":3,"Attempt":0,"Launch Time":1426633936522,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1426633936572,"Failed":false,"Accumulables":[{"ID":1,"Name":"my counter","Update":"572","Value":"950"}]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":13,"Executor Run Time":15,"Result Size":697,"JVM GC Time":0,"Result Serialization Time":2,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":6,"Index":6,"Attempt":0,"Launch Time":1426633936523,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1426633936574,"Failed":false,"Accumulables":[{"ID":1,"Name":"my counter","Update":"978","Value":"1928"}]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":12,"Executor Run Time":15,"Result Size":697,"JVM GC Time":0,"Result Serialization Time":2,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":1,"Index":1,"Attempt":0,"Launch Time":1426633936521,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1426633936574,"Failed":false,"Accumulables":[{"ID":1,"Name":"my counter","Update":"247","Value":"2175"}]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":14,"Executor Run Time":15,"Result Size":697,"JVM GC Time":0,"Result Serialization Time":2,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":4,"Index":4,"Attempt":0,"Launch Time":1426633936522,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1426633936574,"Failed":false,"Accumulables":[{"ID":1,"Name":"my counter","Update":"678","Value":"2853"}]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":12,"Executor Run Time":15,"Result Size":697,"JVM GC Time":0,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":5,"Index":5,"Attempt":0,"Launch Time":1426633936523,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1426633936575,"Failed":false,"Accumulables":[{"ID":1,"Name":"my counter","Update":"897","Value":"3750"}]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":12,"Executor Run Time":15,"Result Size":697,"JVM GC Time":0,"Result Serialization Time":2,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":7,"Index":7,"Attempt":0,"Launch Time":1426633936524,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1426633936575,"Failed":false,"Accumulables":[{"ID":1,"Name":"my counter","Update":"1222","Value":"4972"}]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":12,"Executor Run Time":15,"Result Size":697,"JVM GC Time":0,"Result Serialization Time":2,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":0,"Index":0,"Attempt":0,"Launch Time":1426633936515,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1426633936576,"Failed":false,"Accumulables":[{"ID":1,"Name":"my counter","Update":"78","Value":"5050"}]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":14,"Executor Run Time":15,"Result Size":697,"JVM GC Time":0,"Result Serialization Time":2,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0}}
+{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":0,"Stage Attempt ID":0,"Stage Name":"foreach at <console>:15","Number of Tasks":8,"RDD Info":[{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.foreach(RDD.scala:765)\n$line9.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:15)\n$line9.$read$$iwC$$iwC$$iwC.<init>(<console>:20)\n$line9.$read$$iwC$$iwC.<init>(<console>:22)\n$line9.$read$$iwC.<init>(<console>:24)\n$line9.$read.<init>(<console>:26)\n$line9.$read$.<init>(<console>:30)\n$line9.$read$.<clinit>(<console>)\n$line9.$eval$.<init>(<console>:7)\n$line9.$eval$.<clinit>(<console>)\n$line9.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:483)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Submission Time":1426633936103,"Completion Time":1426633936579,"Accumulables":[{"ID":1,"Name":"my counter","Value":"5050"}]}}
+{"Event":"SparkListenerJobEnd","Job ID":0,"Job Result":{"Result":"JobSucceeded"}}
+{"Event":"SparkListenerApplicationEnd","Timestamp":1426633945177}
diff --git a/core/src/test/resources/spark-events/local-1426633911242/SPARK_VERSION_1.2.0 b/core/src/test/resources/spark-events/local-1426633911242/SPARK_VERSION_1.2.0
new file mode 100755
index 0000000000000..e69de29bb2d1d
diff --git a/core/src/test/resources/spark-events/local-1430917381534 b/core/src/test/resources/spark-events/local-1430917381534
new file mode 100755
index 0000000000000..24ac478073e3d
--- /dev/null
+++ b/core/src/test/resources/spark-events/local-1430917381534
@@ -0,0 +1,231 @@
+{"Event":"SparkListenerLogStart","Spark Version":"1.4.0-SNAPSHOT"}
+{"Event":"SparkListenerBlockManagerAdded","Block Manager ID":{"Executor ID":"driver","Host":"localhost","Port":61103},"Maximum Memory":278019440,"Timestamp":1430917381651}
+{"Event":"SparkListenerEnvironmentUpdate","JVM Information":{"Java Home":"/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre","Java Version":"1.8.0_25 (Oracle Corporation)","Scala Version":"version 2.10.4"},"Spark Properties":{"spark.driver.host":"192.168.1.102","spark.eventLog.enabled":"true","spark.driver.port":"61101","spark.repl.class.uri":"http://192.168.1.102:61100","spark.jars":"","spark.app.name":"Spark shell","spark.scheduler.mode":"FIFO","spark.executor.id":"driver","spark.master":"local[*]","spark.eventLog.dir":"/Users/irashid/github/kraps/core/src/test/resources/spark-events","spark.fileserver.uri":"http://192.168.1.102:61102","spark.tachyonStore.folderName":"spark-aaaf41b3-d1dd-447f-8951-acf51490758b","spark.app.id":"local-1430917381534"},"System Properties":{"java.io.tmpdir":"/var/folders/36/m29jw1z95qv4ywb1c4n0rz000000gp/T/","line.separator":"\n","path.separator":":","sun.management.compiler":"HotSpot 64-Bit Tiered Compilers","SPARK_SUBMIT":"true","sun.cpu.endian":"little","java.specification.version":"1.8","java.vm.specification.name":"Java Virtual Machine Specification","java.vendor":"Oracle Corporation","java.vm.specification.version":"1.8","user.home":"/Users/irashid","file.encoding.pkg":"sun.io","sun.nio.ch.bugLevel":"","ftp.nonProxyHosts":"local|*.local|169.254/16|*.169.254/16","sun.arch.data.model":"64","sun.boot.library.path":"/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib","user.dir":"/Users/irashid/github/spark","java.library.path":"/Users/irashid/Library/Java/Extensions:/Library/Java/Extensions:/Network/Library/Java/Extensions:/System/Library/Java/Extensions:/usr/lib/java:.","sun.cpu.isalist":"","os.arch":"x86_64","java.vm.version":"25.25-b02","java.endorsed.dirs":"/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/endorsed","java.runtime.version":"1.8.0_25-b17","java.vm.info":"mixed mode","java.ext.dirs":"/Users/irashid/Library/Java/Extensions:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/ext:/Library/Java/Extensions:/Network/Library/Java/Extensions:/System/Library/Java/Extensions:/usr/lib/java","java.runtime.name":"Java(TM) SE Runtime Environment","file.separator":"/","java.class.version":"52.0","scala.usejavacp":"true","java.specification.name":"Java Platform API Specification","sun.boot.class.path":"/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/resources.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/rt.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/sunrsasign.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/jsse.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/jce.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/charsets.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/jfr.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/classes","file.encoding":"UTF-8","user.timezone":"America/Chicago","java.specification.vendor":"Oracle Corporation","sun.java.launcher":"SUN_STANDARD","os.version":"10.9.5","sun.os.patch.level":"unknown","gopherProxySet":"false","java.vm.specification.vendor":"Oracle Corporation","user.country":"US","sun.jnu.encoding":"UTF-8","http.nonProxyHosts":"local|*.local|169.254/16|*.169.254/16","user.language":"en","socksNonProxyHosts":"local|*.local|169.254/16|*.169.254/16","java.vendor.url":"http://java.oracle.com/","java.awt.printerjob":"sun.lwawt.macosx.CPrinterJob","java.awt.graphicsenv":"sun.awt.CGraphicsEnvironment","awt.toolkit":"sun.lwawt.macosx.LWCToolkit","os.name":"Mac OS X","java.vm.vendor":"Oracle Corporation","java.vendor.url.bug":"http://bugreport.sun.com/bugreport/","user.name":"irashid","java.vm.name":"Java HotSpot(TM) 64-Bit Server VM","sun.java.command":"org.apache.spark.deploy.SparkSubmit --conf spark.eventLog.enabled=true --conf spark.eventLog.dir=/Users/irashid/github/kraps/core/src/test/resources/spark-events --class org.apache.spark.repl.Main spark-shell","java.home":"/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre","java.version":"1.8.0_25","sun.io.unicode.encoding":"UnicodeBig"},"Classpath Entries":{"/etc/hadoop":"System Classpath","/Users/irashid/github/spark/lib_managed/jars/datanucleus-rdbms-3.2.9.jar":"System Classpath","/Users/irashid/github/spark/conf/":"System Classpath","/Users/irashid/github/spark/assembly/target/scala-2.10/spark-assembly-1.4.0-SNAPSHOT-hadoop2.5.0.jar":"System Classpath","/Users/irashid/github/spark/lib_managed/jars/datanucleus-core-3.2.10.jar":"System Classpath","/Users/irashid/github/spark/lib_managed/jars/datanucleus-api-jdo-3.2.6.jar":"System Classpath"}}
+{"Event":"SparkListenerApplicationStart","App Name":"Spark shell","App ID":"local-1430917381534","Timestamp":1430917380893,"User":"irashid"}
+{"Event":"SparkListenerJobStart","Job ID":0,"Submission Time":1430917386422,"Stage Infos":[{"Stage ID":0,"Stage Attempt ID":0,"Stage Name":"map at <console>:26","Number of Tasks":100,"RDD Info":[{"RDD ID":3,"Name":"3","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":100,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":2,"Name":"2","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":100,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":0,"Name":"/Users/irashid/spark-examples/tmp_data/raw_data","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":100,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":1,"Name":"/Users/irashid/spark-examples/tmp_data/raw_data","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":100,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.map(RDD.scala:287)\n$line20.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:26)\n$line20.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:31)\n$line20.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:33)\n$line20.$read$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:35)\n$line20.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:37)\n$line20.$read$$iwC$$iwC$$iwC.<init>(<console>:39)\n$line20.$read$$iwC$$iwC.<init>(<console>:41)\n$line20.$read$$iwC.<init>(<console>:43)\n$line20.$read.<init>(<console>:45)\n$line20.$read$.<init>(<console>:49)\n$line20.$read$.<clinit>(<console>)\n$line20.$eval$.<init>(<console>:7)\n$line20.$eval$.<clinit>(<console>)\n$line20.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:483)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:1065)","Accumulables":[]},{"Stage ID":1,"Stage Attempt ID":0,"Stage Name":"collect at <console>:26","Number of Tasks":10,"RDD Info":[{"RDD ID":4,"Name":"4","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":10,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.collect(RDD.scala:819)\n$line20.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:26)\n$line20.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:31)\n$line20.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:33)\n$line20.$read$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:35)\n$line20.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:37)\n$line20.$read$$iwC$$iwC$$iwC.<init>(<console>:39)\n$line20.$read$$iwC$$iwC.<init>(<console>:41)\n$line20.$read$$iwC.<init>(<console>:43)\n$line20.$read.<init>(<console>:45)\n$line20.$read$.<init>(<console>:49)\n$line20.$read$.<clinit>(<console>)\n$line20.$eval$.<init>(<console>:7)\n$line20.$eval$.<clinit>(<console>)\n$line20.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:483)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:1065)","Accumulables":[]}],"Stage IDs":[0,1],"Properties":{}}
+{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":0,"Stage Attempt ID":0,"Stage Name":"map at <console>:26","Number of Tasks":100,"RDD Info":[{"RDD ID":3,"Name":"3","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":100,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":2,"Name":"2","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":100,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":0,"Name":"/Users/irashid/spark-examples/tmp_data/raw_data","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":100,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":1,"Name":"/Users/irashid/spark-examples/tmp_data/raw_data","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":100,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.map(RDD.scala:287)\n$line20.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:26)\n$line20.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:31)\n$line20.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:33)\n$line20.$read$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:35)\n$line20.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:37)\n$line20.$read$$iwC$$iwC$$iwC.<init>(<console>:39)\n$line20.$read$$iwC$$iwC.<init>(<console>:41)\n$line20.$read$$iwC.<init>(<console>:43)\n$line20.$read.<init>(<console>:45)\n$line20.$read$.<init>(<console>:49)\n$line20.$read$.<clinit>(<console>)\n$line20.$eval$.<init>(<console>:7)\n$line20.$eval$.<clinit>(<console>)\n$line20.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:483)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:1065)","Accumulables":[]},"Properties":{}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":0,"Index":0,"Attempt":0,"Launch Time":1430917386494,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":1,"Index":1,"Attempt":0,"Launch Time":1430917386502,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":2,"Index":2,"Attempt":0,"Launch Time":1430917386503,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":3,"Index":3,"Attempt":0,"Launch Time":1430917386504,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":4,"Index":4,"Attempt":0,"Launch Time":1430917386504,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":5,"Index":5,"Attempt":0,"Launch Time":1430917386505,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":6,"Index":6,"Attempt":0,"Launch Time":1430917386505,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":7,"Index":7,"Attempt":0,"Launch Time":1430917386506,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":8,"Index":8,"Attempt":0,"Launch Time":1430917386914,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":9,"Index":9,"Attempt":0,"Launch Time":1430917386915,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":10,"Index":10,"Attempt":0,"Launch Time":1430917386916,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":11,"Index":11,"Attempt":0,"Launch Time":1430917386918,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":5,"Index":5,"Attempt":0,"Launch Time":1430917386505,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917386919,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":30,"Executor Run Time":350,"Result Size":2010,"JVM GC Time":7,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":3675510,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":60488,"Records Read":10000}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":2,"Index":2,"Attempt":0,"Launch Time":1430917386503,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917386922,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":32,"Executor Run Time":348,"Result Size":2010,"JVM GC Time":7,"Result Serialization Time":2,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":89885,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":60488,"Records Read":10000}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":1,"Index":1,"Attempt":0,"Launch Time":1430917386502,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917386923,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":31,"Executor Run Time":350,"Result Size":2010,"JVM GC Time":7,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":3934399,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":60488,"Records Read":10000}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":4,"Index":4,"Attempt":0,"Launch Time":1430917386504,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917386923,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":31,"Executor Run Time":349,"Result Size":2010,"JVM GC Time":7,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":83022,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":60488,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":12,"Index":12,"Attempt":0,"Launch Time":1430917386923,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387016,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":6,"Index":6,"Attempt":0,"Launch Time":1430917386505,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917386924,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":29,"Executor Run Time":351,"Result Size":2010,"JVM GC Time":7,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":4016617,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":60488,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":13,"Index":13,"Attempt":0,"Launch Time":1430917386924,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387062,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":14,"Index":14,"Attempt":0,"Launch Time":1430917386925,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387019,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":3,"Index":3,"Attempt":0,"Launch Time":1430917386504,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917386927,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":31,"Executor Run Time":349,"Result Size":2010,"JVM GC Time":7,"Result Serialization Time":2,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":1311694,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":60488,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":15,"Index":15,"Attempt":0,"Launch Time":1430917386928,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387011,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":0,"Index":0,"Attempt":0,"Launch Time":1430917386494,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917386929,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":32,"Executor Run Time":349,"Result Size":2010,"JVM GC Time":7,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":3842811,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":49294,"Records Read":10000}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":7,"Index":7,"Attempt":0,"Launch Time":1430917386506,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917386929,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":31,"Executor Run Time":349,"Result Size":2010,"JVM GC Time":7,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":2579051,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":60488,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":16,"Index":16,"Attempt":0,"Launch Time":1430917387001,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387099,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":8,"Index":8,"Attempt":0,"Launch Time":1430917386914,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387002,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":3,"Executor Run Time":80,"Result Size":2010,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":121551,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":60488,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":17,"Index":17,"Attempt":0,"Launch Time":1430917387005,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387128,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":11,"Index":11,"Attempt":0,"Launch Time":1430917386918,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387007,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":6,"Executor Run Time":75,"Result Size":2010,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":94507,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":18,"Index":18,"Attempt":0,"Launch Time":1430917387010,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387115,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":15,"Index":15,"Attempt":0,"Launch Time":1430917386928,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387011,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":3,"Executor Run Time":76,"Result Size":2010,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":602780,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":19,"Index":19,"Attempt":0,"Launch Time":1430917387012,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387106,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":20,"Index":20,"Attempt":0,"Launch Time":1430917387014,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387104,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":21,"Index":21,"Attempt":0,"Launch Time":1430917387015,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387111,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":10,"Index":10,"Attempt":0,"Launch Time":1430917386916,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387015,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":8,"Executor Run Time":73,"Result Size":2010,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":94709,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":9,"Index":9,"Attempt":0,"Launch Time":1430917386915,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387016,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":9,"Executor Run Time":84,"Result Size":2010,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":101664,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":60489,"Records Read":10000}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":12,"Index":12,"Attempt":0,"Launch Time":1430917386923,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387016,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":9,"Executor Run Time":77,"Result Size":2010,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":102476,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":22,"Index":22,"Attempt":0,"Launch Time":1430917387018,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387119,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":14,"Index":14,"Attempt":0,"Launch Time":1430917386925,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387019,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":6,"Executor Run Time":83,"Result Size":2010,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":95646,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":23,"Index":23,"Attempt":0,"Launch Time":1430917387031,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387115,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":13,"Index":13,"Attempt":0,"Launch Time":1430917386924,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387062,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":9,"Executor Run Time":76,"Result Size":2010,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":95004,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":24,"Index":24,"Attempt":0,"Launch Time":1430917387098,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":16,"Index":16,"Attempt":0,"Launch Time":1430917387001,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387099,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":10,"Executor Run Time":84,"Result Size":2010,"JVM GC Time":5,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":108320,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":25,"Index":25,"Attempt":0,"Launch Time":1430917387103,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":20,"Index":20,"Attempt":0,"Launch Time":1430917387014,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387104,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":3,"Executor Run Time":83,"Result Size":2010,"JVM GC Time":5,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":97716,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":26,"Index":26,"Attempt":0,"Launch Time":1430917387105,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":19,"Index":19,"Attempt":0,"Launch Time":1430917387012,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387106,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":5,"Executor Run Time":84,"Result Size":2010,"JVM GC Time":5,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":95788,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":27,"Index":27,"Attempt":0,"Launch Time":1430917387110,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":21,"Index":21,"Attempt":0,"Launch Time":1430917387015,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387111,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":5,"Executor Run Time":88,"Result Size":2010,"JVM GC Time":5,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":100270,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":28,"Index":28,"Attempt":0,"Launch Time":1430917387113,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":29,"Index":29,"Attempt":0,"Launch Time":1430917387114,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":23,"Index":23,"Attempt":0,"Launch Time":1430917387031,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387115,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":3,"Executor Run Time":65,"Result Size":2010,"JVM GC Time":5,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":91844,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":18,"Index":18,"Attempt":0,"Launch Time":1430917387010,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387115,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":10,"Executor Run Time":92,"Result Size":2065,"JVM GC Time":5,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":100836,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":30,"Index":30,"Attempt":0,"Launch Time":1430917387118,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":22,"Index":22,"Attempt":0,"Launch Time":1430917387018,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387119,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":5,"Executor Run Time":93,"Result Size":2065,"JVM GC Time":5,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":143427,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":31,"Index":31,"Attempt":0,"Launch Time":1430917387127,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":17,"Index":17,"Attempt":0,"Launch Time":1430917387005,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387128,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":11,"Executor Run Time":91,"Result Size":2065,"JVM GC Time":5,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":99944,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":32,"Index":32,"Attempt":0,"Launch Time":1430917387148,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":33,"Index":33,"Attempt":0,"Launch Time":1430917387149,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":24,"Index":24,"Attempt":0,"Launch Time":1430917387098,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387150,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":3,"Executor Run Time":43,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":157194,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":27,"Index":27,"Attempt":0,"Launch Time":1430917387110,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387151,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":3,"Executor Run Time":32,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":102019,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":34,"Index":34,"Attempt":0,"Launch Time":1430917387156,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":26,"Index":26,"Attempt":0,"Launch Time":1430917387105,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387157,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":6,"Executor Run Time":38,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":108213,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":35,"Index":35,"Attempt":0,"Launch Time":1430917387161,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":28,"Index":28,"Attempt":0,"Launch Time":1430917387113,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387162,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":3,"Executor Run Time":29,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":104299,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":36,"Index":36,"Attempt":0,"Launch Time":1430917387164,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":25,"Index":25,"Attempt":0,"Launch Time":1430917387103,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387164,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":3,"Executor Run Time":49,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":94134,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":37,"Index":37,"Attempt":0,"Launch Time":1430917387165,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":38,"Index":38,"Attempt":0,"Launch Time":1430917387166,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":29,"Index":29,"Attempt":0,"Launch Time":1430917387114,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387166,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":39,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":114938,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":39,"Index":39,"Attempt":0,"Launch Time":1430917387180,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":30,"Index":30,"Attempt":0,"Launch Time":1430917387118,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387180,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":4,"Executor Run Time":34,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":119770,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":32,"Index":32,"Attempt":0,"Launch Time":1430917387148,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387181,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":3,"Executor Run Time":17,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":89603,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":40,"Index":40,"Attempt":0,"Launch Time":1430917387197,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":34,"Index":34,"Attempt":0,"Launch Time":1430917387156,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387198,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":6,"Executor Run Time":27,"Result Size":2065,"JVM GC Time":5,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":127746,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":41,"Index":41,"Attempt":0,"Launch Time":1430917387200,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":31,"Index":31,"Attempt":0,"Launch Time":1430917387127,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387201,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":36,"Executor Run Time":24,"Result Size":2065,"JVM GC Time":5,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":92619,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":42,"Index":42,"Attempt":0,"Launch Time":1430917387203,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":36,"Index":36,"Attempt":0,"Launch Time":1430917387164,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387204,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":5,"Executor Run Time":29,"Result Size":2065,"JVM GC Time":5,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":123855,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":43,"Index":43,"Attempt":0,"Launch Time":1430917387204,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":44,"Index":44,"Attempt":0,"Launch Time":1430917387205,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":45,"Index":45,"Attempt":0,"Launch Time":1430917387206,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":37,"Index":37,"Attempt":0,"Launch Time":1430917387165,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387207,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":4,"Executor Run Time":32,"Result Size":2065,"JVM GC Time":5,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":111869,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":33,"Index":33,"Attempt":0,"Launch Time":1430917387149,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387207,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":5,"Executor Run Time":43,"Result Size":2065,"JVM GC Time":5,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":118329,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":46,"Index":46,"Attempt":0,"Launch Time":1430917387210,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":35,"Index":35,"Attempt":0,"Launch Time":1430917387161,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387211,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":5,"Executor Run Time":35,"Result Size":2065,"JVM GC Time":5,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":160963,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":47,"Index":47,"Attempt":0,"Launch Time":1430917387212,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":39,"Index":39,"Attempt":0,"Launch Time":1430917387180,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387212,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":17,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":98748,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":38,"Index":38,"Attempt":0,"Launch Time":1430917387166,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387213,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":3,"Executor Run Time":31,"Result Size":2065,"JVM GC Time":5,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":131158,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":48,"Index":48,"Attempt":0,"Launch Time":1430917387220,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":40,"Index":40,"Attempt":0,"Launch Time":1430917387197,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387221,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":4,"Executor Run Time":14,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":94792,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":49,"Index":49,"Attempt":0,"Launch Time":1430917387223,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":41,"Index":41,"Attempt":0,"Launch Time":1430917387200,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387224,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":16,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":90765,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":50,"Index":50,"Attempt":0,"Launch Time":1430917387240,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":44,"Index":44,"Attempt":0,"Launch Time":1430917387205,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387242,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":3,"Executor Run Time":18,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":98293,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":51,"Index":51,"Attempt":0,"Launch Time":1430917387242,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":52,"Index":52,"Attempt":0,"Launch Time":1430917387243,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":43,"Index":43,"Attempt":0,"Launch Time":1430917387204,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387243,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":16,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":171516,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":45,"Index":45,"Attempt":0,"Launch Time":1430917387206,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387243,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":19,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":92985,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":53,"Index":53,"Attempt":0,"Launch Time":1430917387244,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":54,"Index":54,"Attempt":0,"Launch Time":1430917387244,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":42,"Index":42,"Attempt":0,"Launch Time":1430917387203,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387245,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":10,"Executor Run Time":17,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":103713,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":47,"Index":47,"Attempt":0,"Launch Time":1430917387212,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387245,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":18,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":103015,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":55,"Index":55,"Attempt":0,"Launch Time":1430917387246,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":56,"Index":56,"Attempt":0,"Launch Time":1430917387249,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":48,"Index":48,"Attempt":0,"Launch Time":1430917387220,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387250,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":3,"Executor Run Time":24,"Result Size":2065,"JVM GC Time":6,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":139844,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":46,"Index":46,"Attempt":0,"Launch Time":1430917387210,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387253,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":31,"Result Size":2065,"JVM GC Time":6,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":113322,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":57,"Index":57,"Attempt":0,"Launch Time":1430917387257,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":49,"Index":49,"Attempt":0,"Launch Time":1430917387223,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387257,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":7,"Executor Run Time":23,"Result Size":2065,"JVM GC Time":6,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":94984,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":58,"Index":58,"Attempt":0,"Launch Time":1430917387263,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":51,"Index":51,"Attempt":0,"Launch Time":1430917387242,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387263,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":17,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":96013,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":59,"Index":59,"Attempt":0,"Launch Time":1430917387265,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":50,"Index":50,"Attempt":0,"Launch Time":1430917387240,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387266,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":4,"Executor Run Time":18,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":90836,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":60,"Index":60,"Attempt":0,"Launch Time":1430917387269,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":61,"Index":61,"Attempt":0,"Launch Time":1430917387270,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":52,"Index":52,"Attempt":0,"Launch Time":1430917387243,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387271,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":5,"Executor Run Time":18,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":89664,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":62,"Index":62,"Attempt":0,"Launch Time":1430917387272,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":53,"Index":53,"Attempt":0,"Launch Time":1430917387244,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387273,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":6,"Executor Run Time":18,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":92835,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":63,"Index":63,"Attempt":0,"Launch Time":1430917387276,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":55,"Index":55,"Attempt":0,"Launch Time":1430917387246,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387276,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":4,"Executor Run Time":21,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":108309,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":64,"Index":64,"Attempt":0,"Launch Time":1430917387277,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":57,"Index":57,"Attempt":0,"Launch Time":1430917387257,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387278,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":3,"Executor Run Time":16,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":96849,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":65,"Index":65,"Attempt":0,"Launch Time":1430917387279,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":56,"Index":56,"Attempt":0,"Launch Time":1430917387249,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387280,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":5,"Executor Run Time":20,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":90329,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":66,"Index":66,"Attempt":0,"Launch Time":1430917387285,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":58,"Index":58,"Attempt":0,"Launch Time":1430917387263,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387286,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":3,"Executor Run Time":16,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":97521,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":67,"Index":67,"Attempt":0,"Launch Time":1430917387288,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":59,"Index":59,"Attempt":0,"Launch Time":1430917387265,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387288,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":3,"Executor Run Time":17,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":100753,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":54,"Index":54,"Attempt":0,"Launch Time":1430917387244,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387303,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":3,"Executor Run Time":18,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":90506,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":68,"Index":68,"Attempt":0,"Launch Time":1430917387306,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":60,"Index":60,"Attempt":0,"Launch Time":1430917387269,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387307,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":8,"Executor Run Time":28,"Result Size":2065,"JVM GC Time":5,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":156916,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":69,"Index":69,"Attempt":0,"Launch Time":1430917387308,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":62,"Index":62,"Attempt":0,"Launch Time":1430917387272,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387309,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":32,"Result Size":2065,"JVM GC Time":5,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":100396,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":70,"Index":70,"Attempt":0,"Launch Time":1430917387311,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":64,"Index":64,"Attempt":0,"Launch Time":1430917387277,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387311,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":25,"Result Size":2065,"JVM GC Time":5,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":104087,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":71,"Index":71,"Attempt":0,"Launch Time":1430917387313,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":72,"Index":72,"Attempt":0,"Launch Time":1430917387314,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":61,"Index":61,"Attempt":0,"Launch Time":1430917387270,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387315,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":4,"Executor Run Time":36,"Result Size":2065,"JVM GC Time":5,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":100426,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":73,"Index":73,"Attempt":0,"Launch Time":1430917387316,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":63,"Index":63,"Attempt":0,"Launch Time":1430917387276,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387316,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":20,"Executor Run Time":17,"Result Size":2065,"JVM GC Time":5,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":102779,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":74,"Index":74,"Attempt":0,"Launch Time":1430917387318,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":66,"Index":66,"Attempt":0,"Launch Time":1430917387285,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387319,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":27,"Result Size":2065,"JVM GC Time":5,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":100823,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":65,"Index":65,"Attempt":0,"Launch Time":1430917387279,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387321,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":27,"Result Size":2065,"JVM GC Time":5,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":99123,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":75,"Index":75,"Attempt":0,"Launch Time":1430917387323,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":67,"Index":67,"Attempt":0,"Launch Time":1430917387288,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387323,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":14,"Executor Run Time":19,"Result Size":2065,"JVM GC Time":5,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":92741,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":76,"Index":76,"Attempt":0,"Launch Time":1430917387327,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":68,"Index":68,"Attempt":0,"Launch Time":1430917387306,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387328,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":16,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":101750,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":77,"Index":77,"Attempt":0,"Launch Time":1430917387329,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":69,"Index":69,"Attempt":0,"Launch Time":1430917387308,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387331,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":18,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":89437,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":78,"Index":78,"Attempt":0,"Launch Time":1430917387338,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":71,"Index":71,"Attempt":0,"Launch Time":1430917387313,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387339,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":22,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":183323,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":79,"Index":79,"Attempt":0,"Launch Time":1430917387339,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":70,"Index":70,"Attempt":0,"Launch Time":1430917387311,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387340,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":24,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":105933,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":80,"Index":80,"Attempt":0,"Launch Time":1430917387341,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":74,"Index":74,"Attempt":0,"Launch Time":1430917387318,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387341,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":3,"Executor Run Time":18,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":98047,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":81,"Index":81,"Attempt":0,"Launch Time":1430917387342,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":72,"Index":72,"Attempt":0,"Launch Time":1430917387314,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387343,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":22,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":139584,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":82,"Index":82,"Attempt":0,"Launch Time":1430917387349,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":75,"Index":75,"Attempt":0,"Launch Time":1430917387323,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387359,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":3,"Executor Run Time":20,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":134733,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":83,"Index":83,"Attempt":0,"Launch Time":1430917387360,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":73,"Index":73,"Attempt":0,"Launch Time":1430917387316,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387360,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":16,"Executor Run Time":25,"Result Size":2065,"JVM GC Time":5,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":197728,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":84,"Index":84,"Attempt":0,"Launch Time":1430917387368,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":77,"Index":77,"Attempt":0,"Launch Time":1430917387329,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387368,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":5,"Executor Run Time":30,"Result Size":2065,"JVM GC Time":5,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":104879,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":85,"Index":85,"Attempt":0,"Launch Time":1430917387371,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":76,"Index":76,"Attempt":0,"Launch Time":1430917387327,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387371,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":38,"Result Size":2065,"JVM GC Time":5,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":117198,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":86,"Index":86,"Attempt":0,"Launch Time":1430917387374,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":87,"Index":87,"Attempt":0,"Launch Time":1430917387374,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":79,"Index":79,"Attempt":0,"Launch Time":1430917387339,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387375,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":3,"Executor Run Time":28,"Result Size":2065,"JVM GC Time":5,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":760569,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":80,"Index":80,"Attempt":0,"Launch Time":1430917387341,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387375,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":13,"Executor Run Time":18,"Result Size":2065,"JVM GC Time":5,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":98069,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":88,"Index":88,"Attempt":0,"Launch Time":1430917387376,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":78,"Index":78,"Attempt":0,"Launch Time":1430917387338,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387377,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":3,"Executor Run Time":31,"Result Size":2065,"JVM GC Time":5,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":94029,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":89,"Index":89,"Attempt":0,"Launch Time":1430917387384,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":82,"Index":82,"Attempt":0,"Launch Time":1430917387349,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387385,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":3,"Executor Run Time":23,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":103837,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":90,"Index":90,"Attempt":0,"Launch Time":1430917387385,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":81,"Index":81,"Attempt":0,"Launch Time":1430917387342,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387386,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":3,"Executor Run Time":38,"Result Size":2065,"JVM GC Time":5,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":102935,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":91,"Index":91,"Attempt":0,"Launch Time":1430917387394,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":84,"Index":84,"Attempt":0,"Launch Time":1430917387368,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387395,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":22,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":97176,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":92,"Index":92,"Attempt":0,"Launch Time":1430917387400,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":85,"Index":85,"Attempt":0,"Launch Time":1430917387371,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387401,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":19,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":103743,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":93,"Index":93,"Attempt":0,"Launch Time":1430917387402,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":86,"Index":86,"Attempt":0,"Launch Time":1430917387374,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387402,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":3,"Executor Run Time":16,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":95848,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":94,"Index":94,"Attempt":0,"Launch Time":1430917387404,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":83,"Index":83,"Attempt":0,"Launch Time":1430917387360,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387405,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":37,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":134049,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":95,"Index":95,"Attempt":0,"Launch Time":1430917387406,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":96,"Index":96,"Attempt":0,"Launch Time":1430917387406,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":97,"Index":97,"Attempt":0,"Launch Time":1430917387407,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":88,"Index":88,"Attempt":0,"Launch Time":1430917387376,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387407,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":19,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":94269,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":90,"Index":90,"Attempt":0,"Launch Time":1430917387385,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387408,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":17,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":98472,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":98,"Index":98,"Attempt":0,"Launch Time":1430917387410,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":87,"Index":87,"Attempt":0,"Launch Time":1430917387374,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387410,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":12,"Executor Run Time":17,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":102159,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":89,"Index":89,"Attempt":0,"Launch Time":1430917387384,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387417,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":18,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":97226,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":99,"Index":99,"Attempt":0,"Launch Time":1430917387426,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":91,"Index":91,"Attempt":0,"Launch Time":1430917387394,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387427,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":8,"Executor Run Time":21,"Result Size":2065,"JVM GC Time":4,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":98301,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":94,"Index":94,"Attempt":0,"Launch Time":1430917387404,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387441,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":5,"Executor Run Time":22,"Result Size":2065,"JVM GC Time":4,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":102455,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":92,"Index":92,"Attempt":0,"Launch Time":1430917387400,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387442,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":31,"Result Size":2065,"JVM GC Time":4,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":114766,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":96,"Index":96,"Attempt":0,"Launch Time":1430917387406,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387443,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":3,"Executor Run Time":26,"Result Size":2065,"JVM GC Time":4,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":100678,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":93,"Index":93,"Attempt":0,"Launch Time":1430917387402,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387443,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":32,"Result Size":2065,"JVM GC Time":4,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":95818,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":95,"Index":95,"Attempt":0,"Launch Time":1430917387406,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387444,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":28,"Result Size":2065,"JVM GC Time":4,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":117803,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":98,"Index":98,"Attempt":0,"Launch Time":1430917387410,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387444,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":7,"Executor Run Time":22,"Result Size":2065,"JVM GC Time":4,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":105039,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":99,"Index":99,"Attempt":0,"Launch Time":1430917387426,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387448,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":17,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":133964,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70565,"Records Read":10000}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":97,"Index":97,"Attempt":0,"Launch Time":1430917387407,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387450,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":38,"Result Size":2065,"JVM GC Time":4,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":131987,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":0,"Stage Attempt ID":0,"Stage Name":"map at <console>:26","Number of Tasks":100,"RDD Info":[{"RDD ID":3,"Name":"3","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":100,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":2,"Name":"2","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":100,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":0,"Name":"/Users/irashid/spark-examples/tmp_data/raw_data","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":100,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":1,"Name":"/Users/irashid/spark-examples/tmp_data/raw_data","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":100,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.map(RDD.scala:287)\n$line20.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:26)\n$line20.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:31)\n$line20.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:33)\n$line20.$read$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:35)\n$line20.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:37)\n$line20.$read$$iwC$$iwC$$iwC.<init>(<console>:39)\n$line20.$read$$iwC$$iwC.<init>(<console>:41)\n$line20.$read$$iwC.<init>(<console>:43)\n$line20.$read.<init>(<console>:45)\n$line20.$read$.<init>(<console>:49)\n$line20.$read$.<clinit>(<console>)\n$line20.$eval$.<init>(<console>:7)\n$line20.$eval$.<clinit>(<console>)\n$line20.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:483)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:1065)","Submission Time":1430917386485,"Completion Time":1430917387451,"Accumulables":[]}}
+{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":1,"Stage Attempt ID":0,"Stage Name":"collect at <console>:26","Number of Tasks":10,"RDD Info":[{"RDD ID":4,"Name":"4","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":10,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.collect(RDD.scala:819)\n$line20.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:26)\n$line20.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:31)\n$line20.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:33)\n$line20.$read$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:35)\n$line20.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:37)\n$line20.$read$$iwC$$iwC$$iwC.<init>(<console>:39)\n$line20.$read$$iwC$$iwC.<init>(<console>:41)\n$line20.$read$$iwC.<init>(<console>:43)\n$line20.$read.<init>(<console>:45)\n$line20.$read$.<init>(<console>:49)\n$line20.$read$.<clinit>(<console>)\n$line20.$eval$.<init>(<console>:7)\n$line20.$eval$.<clinit>(<console>)\n$line20.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:483)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:1065)","Accumulables":[]},"Properties":{}}
+{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":100,"Index":0,"Attempt":0,"Launch Time":1430917387462,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":101,"Index":1,"Attempt":0,"Launch Time":1430917387464,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":102,"Index":2,"Attempt":0,"Launch Time":1430917387465,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":103,"Index":3,"Attempt":0,"Launch Time":1430917387465,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":104,"Index":4,"Attempt":0,"Launch Time":1430917387465,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":105,"Index":5,"Attempt":0,"Launch Time":1430917387466,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":106,"Index":6,"Attempt":0,"Launch Time":1430917387466,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":107,"Index":7,"Attempt":0,"Launch Time":1430917387466,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":108,"Index":8,"Attempt":0,"Launch Time":1430917387546,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":103,"Index":3,"Attempt":0,"Launch Time":1430917387465,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387546,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":75,"Result Size":1034,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":100,"Fetch Wait Time":0,"Remote Bytes Read":0,"Local Bytes Read":17100,"Total Records Read":100}}}
+{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":109,"Index":9,"Attempt":0,"Launch Time":1430917387546,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":105,"Index":5,"Attempt":0,"Launch Time":1430917387466,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387547,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":76,"Result Size":1034,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":100,"Fetch Wait Time":0,"Remote Bytes Read":0,"Local Bytes Read":17100,"Total Records Read":100}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":100,"Index":0,"Attempt":0,"Launch Time":1430917387462,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387548,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":3,"Executor Run Time":74,"Result Size":1034,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":100,"Fetch Wait Time":0,"Remote Bytes Read":0,"Local Bytes Read":17100,"Total Records Read":100}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":107,"Index":7,"Attempt":0,"Launch Time":1430917387466,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387548,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":76,"Result Size":1034,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":100,"Fetch Wait Time":0,"Remote Bytes Read":0,"Local Bytes Read":17100,"Total Records Read":100}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":101,"Index":1,"Attempt":0,"Launch Time":1430917387464,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387548,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":76,"Result Size":1034,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":100,"Fetch Wait Time":1,"Remote Bytes Read":0,"Local Bytes Read":17100,"Total Records Read":100}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":106,"Index":6,"Attempt":0,"Launch Time":1430917387466,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387549,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":75,"Result Size":1034,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":100,"Fetch Wait Time":0,"Remote Bytes Read":0,"Local Bytes Read":17100,"Total Records Read":100}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":104,"Index":4,"Attempt":0,"Launch Time":1430917387465,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387549,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":3,"Executor Run Time":74,"Result Size":1034,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":100,"Fetch Wait Time":1,"Remote Bytes Read":0,"Local Bytes Read":17100,"Total Records Read":100}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":102,"Index":2,"Attempt":0,"Launch Time":1430917387465,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387550,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":79,"Result Size":1034,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":100,"Fetch Wait Time":0,"Remote Bytes Read":0,"Local Bytes Read":17100,"Total Records Read":100}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":108,"Index":8,"Attempt":0,"Launch Time":1430917387546,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387583,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":31,"Result Size":1034,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":100,"Fetch Wait Time":0,"Remote Bytes Read":0,"Local Bytes Read":17100,"Total Records Read":100}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":109,"Index":9,"Attempt":0,"Launch Time":1430917387546,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387585,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":30,"Result Size":1034,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":100,"Fetch Wait Time":1,"Remote Bytes Read":0,"Local Bytes Read":17100,"Total Records Read":100}}}
+{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":1,"Stage Attempt ID":0,"Stage Name":"collect at <console>:26","Number of Tasks":10,"RDD Info":[{"RDD ID":4,"Name":"4","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":10,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.collect(RDD.scala:819)\n$line20.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:26)\n$line20.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:31)\n$line20.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:33)\n$line20.$read$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:35)\n$line20.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:37)\n$line20.$read$$iwC$$iwC$$iwC.<init>(<console>:39)\n$line20.$read$$iwC$$iwC.<init>(<console>:41)\n$line20.$read$$iwC.<init>(<console>:43)\n$line20.$read.<init>(<console>:45)\n$line20.$read$.<init>(<console>:49)\n$line20.$read$.<clinit>(<console>)\n$line20.$eval$.<init>(<console>:7)\n$line20.$eval$.<clinit>(<console>)\n$line20.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:483)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:1065)","Submission Time":1430917387462,"Completion Time":1430917387585,"Accumulables":[]}}
+{"Event":"SparkListenerJobEnd","Job ID":0,"Completion Time":1430917387589,"Job Result":{"Result":"JobSucceeded"}}
+{"Event":"SparkListenerApplicationEnd","Timestamp":1430917391398}
diff --git a/core/src/test/scala/org/apache/spark/JsonTestUtils.scala b/core/src/test/scala/org/apache/spark/JsonTestUtils.scala
new file mode 100644
index 0000000000000..ba367cd476146
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/JsonTestUtils.scala
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark
+
+import org.json4s._
+import org.json4s.jackson.JsonMethods
+
+trait JsonTestUtils {
+  def assertValidDataInJson(validateJson: JValue, expectedJson: JValue) {
+    val Diff(c, a, d) = validateJson.diff(expectedJson)
+    val validatePretty = JsonMethods.pretty(validateJson)
+    val expectedPretty = JsonMethods.pretty(expectedJson)
+    val errorMessage = s"Expected:\n$expectedPretty\nFound:\n$validatePretty"
+    import org.scalactic.TripleEquals._
+    assert(c === JNothing, s"$errorMessage\nChanged:\n${JsonMethods.pretty(c)}")
+    assert(a === JNothing, s"$errorMessage\nAdded:\n${JsonMethods.pretty(a)}")
+    assert(d === JNothing, s"$errorMessage\nDeleted:\n${JsonMethods.pretty(d)}")
+  }
+
+}
diff --git a/core/src/test/scala/org/apache/spark/deploy/JsonProtocolSuite.scala b/core/src/test/scala/org/apache/spark/deploy/JsonProtocolSuite.scala
index b58d62567afe1..e04a79284175c 100644
--- a/core/src/test/scala/org/apache/spark/deploy/JsonProtocolSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/JsonProtocolSuite.scala
@@ -28,9 +28,9 @@ import org.scalatest.FunSuite
 import org.apache.spark.deploy.DeployMessages.{MasterStateResponse, WorkerStateResponse}
 import org.apache.spark.deploy.master.{ApplicationInfo, DriverInfo, RecoveryState, WorkerInfo}
 import org.apache.spark.deploy.worker.{DriverRunner, ExecutorRunner}
-import org.apache.spark.{SecurityManager, SparkConf}
+import org.apache.spark.{JsonTestUtils, SecurityManager, SparkConf}
 
-class JsonProtocolSuite extends FunSuite {
+class JsonProtocolSuite extends FunSuite with JsonTestUtils {
 
   test("writeApplicationInfo") {
     val output = JsonProtocol.writeApplicationInfo(createAppInfo())
@@ -136,16 +136,6 @@ class JsonProtocolSuite extends FunSuite {
       case e: JsonParseException => fail("Invalid Json detected", e)
     }
   }
-
-  def assertValidDataInJson(validateJson: JValue, expectedJson: JValue) {
-    val Diff(c, a, d) = validateJson diff expectedJson
-    val validatePretty = JsonMethods.pretty(validateJson)
-    val expectedPretty = JsonMethods.pretty(expectedJson)
-    val errorMessage = s"Expected:\n$expectedPretty\nFound:\n$validatePretty"
-    assert(c === JNothing, s"$errorMessage\nChanged:\n${JsonMethods.pretty(c)}")
-    assert(a === JNothing, s"$errorMessage\nAdded:\n${JsonMethods.pretty(a)}")
-    assert(d === JNothing, s"$errorMessage\nDelected:\n${JsonMethods.pretty(d)}")
-  }
 }
 
 object JsonConstants {
diff --git a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala
index 71ba9c18257b8..318ab5dbc4804 100644
--- a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala
@@ -14,22 +14,164 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
 package org.apache.spark.deploy.history
 
-import javax.servlet.http.HttpServletRequest
-
-import scala.collection.mutable
+import java.io.{File, FileInputStream, FileWriter, IOException}
+import java.net.{HttpURLConnection, URL}
+import javax.servlet.http.{HttpServletRequest, HttpServletResponse}
 
-import org.apache.hadoop.fs.Path
-import org.mockito.Mockito.{when}
-import org.scalatest.FunSuite
-import org.scalatest.Matchers
+import org.apache.commons.io.{FileUtils, IOUtils}
+import org.mockito.Mockito.when
+import org.scalatest.{BeforeAndAfter, FunSuite, Matchers}
 import org.scalatest.mock.MockitoSugar
 
+import org.apache.spark.{JsonTestUtils, SecurityManager, SparkConf}
 import org.apache.spark.ui.SparkUI
 
-class HistoryServerSuite extends FunSuite with Matchers with MockitoSugar {
+/**
+ * A collection of tests against the historyserver, including comparing responses from the json
+ * metrics api to a set of known "golden files".  If new endpoints / parameters are added,
+ * cases should be added to this test suite.  The expected outcomes can be genered by running
+ * the HistoryServerSuite.main.  Note that this will blindly generate new expectation files matching
+ * the current behavior -- the developer must verify that behavior is correct.
+ *
+ * Similarly, if the behavior is changed, HistoryServerSuite.main can be run to update the
+ * expectations.  However, in general this should be done with extreme caution, as the metrics
+ * are considered part of Spark's public api.
+ */
+class HistoryServerSuite extends FunSuite with BeforeAndAfter with Matchers with MockitoSugar
+  with JsonTestUtils {
+
+  private val logDir = new File("src/test/resources/spark-events")
+  private val expRoot = new File("src/test/resources/HistoryServerExpectations/")
+
+  private var provider: FsHistoryProvider = null
+  private var server: HistoryServer = null
+  private var port: Int = -1
+
+  def init(): Unit = {
+    val conf = new SparkConf()
+      .set("spark.history.fs.logDirectory", logDir.getAbsolutePath)
+      .set("spark.history.fs.updateInterval", "0")
+      .set("spark.testing", "true")
+    provider = new FsHistoryProvider(conf)
+    provider.checkForLogs()
+    val securityManager = new SecurityManager(conf)
+
+    server = new HistoryServer(conf, provider, securityManager, 18080)
+    server.initialize()
+    server.bind()
+    port = server.boundPort
+  }
+
+  def stop(): Unit = {
+    server.stop()
+  }
+
+  before {
+    init()
+  }
+
+  after{
+    stop()
+  }
+
+  val cases = Seq(
+    "application list json" -> "applications",
+    "completed app list json" -> "applications?status=completed",
+    "running app list json" -> "applications?status=running",
+    "minDate app list json" -> "applications?minDate=2015-02-10",
+    "maxDate app list json" -> "applications?maxDate=2015-02-10",
+    "maxDate2 app list json" -> "applications?maxDate=2015-02-03T10:42:40.000CST",
+    "one app json" -> "applications/local-1422981780767",
+    "one app multi-attempt json" -> "applications/local-1426533911241",
+    "job list json" -> "applications/local-1422981780767/jobs",
+    "job list from multi-attempt app json(1)" -> "applications/local-1426533911241/1/jobs",
+    "job list from multi-attempt app json(2)" -> "applications/local-1426533911241/2/jobs",
+    "one job json" -> "applications/local-1422981780767/jobs/0",
+    "succeeded job list json" -> "applications/local-1422981780767/jobs?status=succeeded",
+    "succeeded&failed job list json" ->
+      "applications/local-1422981780767/jobs?status=succeeded&status=failed",
+    "executor list json" -> "applications/local-1422981780767/executors",
+    "stage list json" -> "applications/local-1422981780767/stages",
+    "complete stage list json" -> "applications/local-1422981780767/stages?status=complete",
+    "failed stage list json" -> "applications/local-1422981780767/stages?status=failed",
+    "one stage json" -> "applications/local-1422981780767/stages/1",
+    "one stage attempt json" -> "applications/local-1422981780767/stages/1/0",
+
+    "stage task summary w shuffle write"
+      -> "applications/local-1430917381534/stages/0/0/taskSummary",
+    "stage task summary w shuffle read"
+      -> "applications/local-1430917381534/stages/1/0/taskSummary",
+    "stage task summary w/ custom quantiles" ->
+      "applications/local-1430917381534/stages/0/0/taskSummary?quantiles=0.01,0.5,0.99",
+
+    "stage task list" -> "applications/local-1430917381534/stages/0/0/taskList",
+    "stage task list w/ offset & length" ->
+      "applications/local-1430917381534/stages/0/0/taskList?offset=10&length=50",
+    "stage task list w/ sortBy" ->
+      "applications/local-1430917381534/stages/0/0/taskList?sortBy=DECREASING_RUNTIME",
+    "stage task list w/ sortBy short names: -runtime" ->
+      "applications/local-1430917381534/stages/0/0/taskList?sortBy=-runtime",
+    "stage task list w/ sortBy short names: runtime" ->
+      "applications/local-1430917381534/stages/0/0/taskList?sortBy=runtime",
+
+    "stage list with accumulable json" -> "applications/local-1426533911241/1/stages",
+    "stage with accumulable json" -> "applications/local-1426533911241/1/stages/0/0",
+    "stage task list from multi-attempt app json(1)" ->
+      "applications/local-1426533911241/1/stages/0/0/taskList",
+    "stage task list from multi-attempt app json(2)" ->
+      "applications/local-1426533911241/2/stages/0/0/taskList",
+
+    "rdd list storage json" -> "applications/local-1422981780767/storage/rdd",
+    "one rdd storage json" -> "applications/local-1422981780767/storage/rdd/0"
+  )
+
+  // run a bunch of characterization tests -- just verify the behavior is the same as what is saved
+  // in the test resource folder
+  cases.foreach { case (name, path) =>
+    test(name) {
+      val (code, jsonOpt, errOpt) = getContentAndCode(path)
+      code should be (HttpServletResponse.SC_OK)
+      jsonOpt should be ('defined)
+      errOpt should be (None)
+      val json = jsonOpt.get
+      val exp = IOUtils.toString(new FileInputStream(
+        new File(expRoot, HistoryServerSuite.sanitizePath(name) + "_expectation.json")))
+      // compare the ASTs so formatting differences don't cause failures
+      import org.json4s._
+      import org.json4s.jackson.JsonMethods._
+      val jsonAst = parse(json)
+      val expAst = parse(exp)
+      assertValidDataInJson(jsonAst, expAst)
+    }
+  }
+
+  test("response codes on bad paths") {
+    val badAppId = getContentAndCode("applications/foobar")
+    badAppId._1 should be (HttpServletResponse.SC_NOT_FOUND)
+    badAppId._3 should be (Some("unknown app: foobar"))
+
+    val badStageId = getContentAndCode("applications/local-1422981780767/stages/12345")
+    badStageId._1 should be (HttpServletResponse.SC_NOT_FOUND)
+    badStageId._3 should be (Some("unknown stage: 12345"))
+
+    val badStageAttemptId = getContentAndCode("applications/local-1422981780767/stages/1/1")
+    badStageAttemptId._1 should be (HttpServletResponse.SC_NOT_FOUND)
+    badStageAttemptId._3 should be (Some("unknown attempt for stage 1.  Found attempts: [0]"))
+
+    val badStageId2 = getContentAndCode("applications/local-1422981780767/stages/flimflam")
+    badStageId2._1 should be (HttpServletResponse.SC_NOT_FOUND)
+    // will take some mucking w/ jersey to get a better error msg in this case
+
+    val badQuantiles = getContentAndCode(
+      "applications/local-1430917381534/stages/0/0/taskSummary?quantiles=foo,0.1")
+    badQuantiles._1 should be (HttpServletResponse.SC_BAD_REQUEST)
+    badQuantiles._3 should be (Some("Bad value for parameter \"quantiles\".  Expected a double, " +
+      "got \"foo\""))
+
+    getContentAndCode("foobar")._1 should be (HttpServletResponse.SC_NOT_FOUND)
+  }
 
   test("generate history page with relative links") {
     val historyServer = mock[HistoryServer]
@@ -54,4 +196,75 @@ class HistoryServerSuite extends FunSuite with Matchers with MockitoSugar {
     } yield (attrs.toString)
     justHrefs should contain(link)
   }
+
+  def getContentAndCode(path: String, port: Int = port): (Int, Option[String], Option[String]) = {
+    HistoryServerSuite.getContentAndCode(new URL(s"http://localhost:$port/json/v1/$path"))
+  }
+
+  def getUrl(path: String): String = {
+    HistoryServerSuite.getUrl(new URL(s"http://localhost:$port/json/v1/$path"))
+  }
+
+  def generateExpectation(name: String, path: String): Unit = {
+    val json = getUrl(path)
+    val file = new File(expRoot, HistoryServerSuite.sanitizePath(name) + "_expectation.json")
+    val out = new FileWriter(file)
+    out.write(json)
+    out.close()
+  }
+}
+
+object HistoryServerSuite {
+  def main(args: Array[String]): Unit = {
+    // generate the "expected" results for the characterization tests.  Just blindly assume the
+    // current behavior is correct, and write out the returned json to the test/resource files
+
+    val suite = new HistoryServerSuite
+    FileUtils.deleteDirectory(suite.expRoot)
+    suite.expRoot.mkdirs()
+    try {
+      suite.init()
+      suite.cases.foreach { case (name, path) =>
+        suite.generateExpectation(name, path)
+      }
+    } finally {
+      suite.stop()
+    }
+  }
+
+  def getContentAndCode(url: URL): (Int, Option[String], Option[String]) = {
+    val connection = url.openConnection().asInstanceOf[HttpURLConnection]
+    connection.setRequestMethod("GET")
+    connection.connect()
+    val code = connection.getResponseCode()
+    val inString = try {
+      val in = Option(connection.getInputStream())
+      in.map(IOUtils.toString)
+    } catch {
+      case io: IOException => None
+    }
+    val errString = try {
+      val err = Option(connection.getErrorStream())
+      err.map(IOUtils.toString)
+    } catch {
+      case io: IOException => None
+    }
+    (code, inString, errString)
+  }
+
+
+  def sanitizePath(path: String): String = {
+    // this doesn't need to be perfect, just good enough to avoid collisions
+    path.replaceAll("\\W", "_")
+  }
+
+  def getUrl(path: URL): String = {
+    val (code, resultOpt, error) = getContentAndCode(path)
+    if (code == 200) {
+      resultOpt.get
+    } else {
+      throw new RuntimeException(
+        "got code: " + code + " when getting " + path + " w/ error: " + error)
+    }
+  }
 }
diff --git a/core/src/test/scala/org/apache/spark/status/api/v1/SimpleDateParamTest.scala b/core/src/test/scala/org/apache/spark/status/api/v1/SimpleDateParamTest.scala
new file mode 100644
index 0000000000000..5274df904d395
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/status/api/v1/SimpleDateParamTest.scala
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.status.api.v1
+
+import org.scalatest.{Matchers, FunSuite}
+
+class SimpleDateParamTest extends FunSuite with Matchers {
+
+  test("date parsing") {
+    new SimpleDateParam("2015-02-20T23:21:17.190GMT").timestamp should be (1424474477190L)
+    new SimpleDateParam("2015-02-20T17:21:17.190CST").timestamp should be (1424474477190L)
+    new SimpleDateParam("2015-02-20").timestamp should be (1424390400000L)  // GMT
+  }
+
+}
diff --git a/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala b/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala
index d53d7f3ba5ae7..117b2c3960820 100644
--- a/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala
@@ -18,11 +18,13 @@
 package org.apache.spark.ui
 
 import java.net.{HttpURLConnection, URL}
-import javax.servlet.http.HttpServletRequest
+import javax.servlet.http.{HttpServletResponse, HttpServletRequest}
 
 import scala.collection.JavaConversions._
 import scala.xml.Node
 
+import org.json4s._
+import org.json4s.jackson.JsonMethods
 import org.openqa.selenium.htmlunit.HtmlUnitDriver
 import org.openqa.selenium.{By, WebDriver}
 import org.scalatest._
@@ -33,8 +35,9 @@ import org.scalatest.time.SpanSugar._
 import org.apache.spark.LocalSparkContext._
 import org.apache.spark._
 import org.apache.spark.api.java.StorageLevels
+import org.apache.spark.deploy.history.HistoryServerSuite
 import org.apache.spark.shuffle.FetchFailedException
-
+import org.apache.spark.status.api.v1.{JacksonMessageWriter, StageStatus}
 
 /**
  * Selenium tests for the Spark Web UI.
@@ -42,6 +45,8 @@ import org.apache.spark.shuffle.FetchFailedException
 class UISeleniumSuite extends FunSuite with WebBrowser with Matchers with BeforeAndAfterAll {
 
   implicit var webDriver: WebDriver = _
+  implicit val formats = DefaultFormats
+
 
   override def beforeAll(): Unit = {
     webDriver = new HtmlUnitDriver
@@ -76,28 +81,42 @@ class UISeleniumSuite extends FunSuite with WebBrowser with Matchers with Before
       val rdd = sc.parallelize(Seq(1, 2, 3))
       rdd.persist(StorageLevels.DISK_ONLY).count()
       eventually(timeout(5 seconds), interval(50 milliseconds)) {
-        go to (ui.appUIAddress.stripSuffix("/") + "/storage")
+        goToUi(ui, "/storage")
         val tableRowText = findAll(cssSelector("#storage-by-rdd-table td")).map(_.text).toSeq
         tableRowText should contain (StorageLevels.DISK_ONLY.description)
       }
       eventually(timeout(5 seconds), interval(50 milliseconds)) {
-        go to (ui.appUIAddress.stripSuffix("/") + "/storage/rdd/?id=0")
+        goToUi(ui, "/storage/rdd/?id=0")
         val tableRowText = findAll(cssSelector("#rdd-storage-by-block-table td")).map(_.text).toSeq
         tableRowText should contain (StorageLevels.DISK_ONLY.description)
       }
 
+      val storageJson = getJson(ui, "storage/rdd")
+      storageJson.children.length should be (1)
+      (storageJson \ "storageLevel").extract[String] should be (StorageLevels.DISK_ONLY.description)
+      val rddJson = getJson(ui, "storage/rdd/0")
+      (rddJson  \ "storageLevel").extract[String] should be (StorageLevels.DISK_ONLY.description)
+
       rdd.unpersist()
       rdd.persist(StorageLevels.MEMORY_ONLY).count()
       eventually(timeout(5 seconds), interval(50 milliseconds)) {
-        go to (ui.appUIAddress.stripSuffix("/") + "/storage")
+        goToUi(ui, "/storage")
         val tableRowText = findAll(cssSelector("#storage-by-rdd-table td")).map(_.text).toSeq
         tableRowText should contain (StorageLevels.MEMORY_ONLY.description)
       }
       eventually(timeout(5 seconds), interval(50 milliseconds)) {
-        go to (ui.appUIAddress.stripSuffix("/") + "/storage/rdd/?id=0")
+        goToUi(ui, "/storage/rdd/?id=0")
         val tableRowText = findAll(cssSelector("#rdd-storage-by-block-table td")).map(_.text).toSeq
         tableRowText should contain (StorageLevels.MEMORY_ONLY.description)
       }
+
+      val updatedStorageJson = getJson(ui, "storage/rdd")
+      updatedStorageJson.children.length should be (1)
+      (updatedStorageJson \ "storageLevel").extract[String] should be (
+        StorageLevels.MEMORY_ONLY.description)
+      val updatedRddJson = getJson(ui, "storage/rdd/0")
+      (updatedRddJson  \ "storageLevel").extract[String] should be (
+        StorageLevels.MEMORY_ONLY.description)
     }
   }
 
@@ -108,10 +127,13 @@ class UISeleniumSuite extends FunSuite with WebBrowser with Matchers with Before
         sc.parallelize(1 to 10).map { x => throw new Exception()}.collect()
       }
       eventually(timeout(5 seconds), interval(50 milliseconds)) {
-        go to (sc.ui.get.appUIAddress.stripSuffix("/") + "/stages")
+        goToUi(sc, "/stages")
         find(id("active")) should be(None)  // Since we hide empty tables
         find(id("failed")).get.text should be("Failed Stages (1)")
       }
+      val stageJson = getJson(sc.ui.get, "stages")
+      stageJson.children.length should be (1)
+      (stageJson \ "status").extract[String] should be (StageStatus.FAILED.name())
 
       // Regression test for SPARK-2105
       class NotSerializable
@@ -120,12 +142,15 @@ class UISeleniumSuite extends FunSuite with WebBrowser with Matchers with Before
         sc.parallelize(1 to 10).map { x => unserializableObject}.collect()
       }
       eventually(timeout(5 seconds), interval(50 milliseconds)) {
-        go to (sc.ui.get.appUIAddress.stripSuffix("/") + "/stages")
+        goToUi(sc, "/stages")
         find(id("active")) should be(None)  // Since we hide empty tables
         // The failure occurs before the stage becomes active, hence we should still show only one
         // failed stage, not two:
         find(id("failed")).get.text should be("Failed Stages (1)")
       }
+
+      val updatedStageJson = getJson(sc.ui.get, "stages")
+      updatedStageJson should be (stageJson)
     }
   }
 
@@ -138,7 +163,7 @@ class UISeleniumSuite extends FunSuite with WebBrowser with Matchers with Before
     withSpark(newSparkContext(killEnabled = true)) { sc =>
       runSlowJob(sc)
       eventually(timeout(5 seconds), interval(50 milliseconds)) {
-        go to (sc.ui.get.appUIAddress.stripSuffix("/") + "/stages")
+        goToUi(sc, "/stages")
         assert(hasKillLink)
       }
     }
@@ -146,7 +171,7 @@ class UISeleniumSuite extends FunSuite with WebBrowser with Matchers with Before
     withSpark(newSparkContext(killEnabled = false)) { sc =>
       runSlowJob(sc)
       eventually(timeout(5 seconds), interval(50 milliseconds)) {
-        go to (sc.ui.get.appUIAddress.stripSuffix("/") + "/stages")
+        goToUi(sc, "/stages")
         assert(!hasKillLink)
       }
     }
@@ -157,7 +182,7 @@ class UISeleniumSuite extends FunSuite with WebBrowser with Matchers with Before
       // If no job has been run in a job group, then "(Job Group)" should not appear in the header
       sc.parallelize(Seq(1, 2, 3)).count()
       eventually(timeout(5 seconds), interval(50 milliseconds)) {
-        go to (sc.ui.get.appUIAddress.stripSuffix("/") + "/jobs")
+        goToUi(sc, "/jobs")
         val tableHeaders = findAll(cssSelector("th")).map(_.text).toSeq
         tableHeaders should not contain "Job Id (Job Group)"
       }
@@ -165,10 +190,22 @@ class UISeleniumSuite extends FunSuite with WebBrowser with Matchers with Before
       sc.setJobGroup("my-job-group", "my-job-group-description")
       sc.parallelize(Seq(1, 2, 3)).count()
       eventually(timeout(5 seconds), interval(50 milliseconds)) {
-        go to (sc.ui.get.appUIAddress.stripSuffix("/") + "/jobs")
+        goToUi(sc, "/jobs")
         val tableHeaders = findAll(cssSelector("th")).map(_.text).toSeq
         tableHeaders should contain ("Job Id (Job Group)")
       }
+
+      val jobJson = getJson(sc.ui.get, "jobs")
+      for {
+        job @ JObject(_) <- jobJson
+        JInt(jobId) <- job \ "jobId"
+        jobGroup = job \ "jobGroup"
+      } {
+        jobId.toInt match {
+          case 0 => jobGroup should be (JNothing)
+          case 1 => jobGroup should be (JString("my-job-group"))
+        }
+      }
     }
   }
 
@@ -195,7 +232,7 @@ class UISeleniumSuite extends FunSuite with WebBrowser with Matchers with Before
       }
       mappedData.count()
       eventually(timeout(5 seconds), interval(50 milliseconds)) {
-        go to (sc.ui.get.appUIAddress.stripSuffix("/") + "/jobs")
+        goToUi(sc, "/jobs")
         find(cssSelector(".stage-progress-cell")).get.text should be ("2/2 (1 failed)")
         // Ideally, the following test would pass, but currently we overcount completed tasks
         // if task recomputations occur:
@@ -204,6 +241,32 @@ class UISeleniumSuite extends FunSuite with WebBrowser with Matchers with Before
         // of completed tasks may be higher:
         find(cssSelector(".progress-cell .progress")).get.text should be ("3/2 (1 failed)")
       }
+      val jobJson = getJson(sc.ui.get, "jobs")
+      (jobJson \ "numTasks").extract[Int]should be (2)
+      (jobJson \ "numCompletedTasks").extract[Int] should be (3)
+      (jobJson \ "numFailedTasks").extract[Int] should be (1)
+      (jobJson \ "numCompletedStages").extract[Int] should be (2)
+      (jobJson \ "numFailedStages").extract[Int] should be (1)
+      val stageJson = getJson(sc.ui.get, "stages")
+
+      for {
+        stage @ JObject(_) <- stageJson
+        JString(status) <- stage \ "status"
+        JInt(stageId) <- stage \ "stageId"
+        JInt(attemptId) <- stage \ "attemptId"
+      } {
+        val exp = if (attemptId == 0 && stageId == 1) StageStatus.FAILED else StageStatus.COMPLETE
+        status should be (exp.name())
+      }
+
+      for {
+        stageId <- 0 to 1
+        attemptId <- 0 to 1
+      } {
+        val exp = if (attemptId == 0 && stageId == 1) StageStatus.FAILED else StageStatus.COMPLETE
+        val stageJson = getJson(sc.ui.get, s"stages/$stageId/$attemptId")
+        (stageJson \ "status").extract[String] should be (exp.name())
+      }
     }
   }
 
@@ -218,7 +281,7 @@ class UISeleniumSuite extends FunSuite with WebBrowser with Matchers with Before
       // Start the job:
       rdd.countAsync()
       eventually(timeout(10 seconds), interval(50 milliseconds)) {
-        go to (sc.ui.get.appUIAddress.stripSuffix("/") + "/jobs/job/?id=0")
+        goToUi(sc, "/jobs/job/?id=0")
         find(id("active")).get.text should be ("Active Stages (1)")
         find(id("pending")).get.text should be ("Pending Stages (2)")
         // Essentially, we want to check that none of the stage rows show
@@ -244,7 +307,7 @@ class UISeleniumSuite extends FunSuite with WebBrowser with Matchers with Before
       rdd.count()
       rdd.count()
       eventually(timeout(10 seconds), interval(50 milliseconds)) {
-        go to (sc.ui.get.appUIAddress.stripSuffix("/") + "/jobs")
+        goToUi(sc, "/jobs")
         // The completed jobs table should have two rows. The first row will be the most recent job:
         val firstRow = find(cssSelector("tbody tr")).get.underlying
         val firstRowColumns = firstRow.findElements(By.tagName("td"))
@@ -271,7 +334,7 @@ class UISeleniumSuite extends FunSuite with WebBrowser with Matchers with Before
       rdd.count()
       rdd.count()
       eventually(timeout(10 seconds), interval(50 milliseconds)) {
-        go to (sc.ui.get.appUIAddress.stripSuffix("/") + "/jobs/job/?id=1")
+        goToUi(sc, "/jobs/job/?id=1")
         find(id("pending")) should be (None)
         find(id("active")) should be (None)
         find(id("failed")) should be (None)
@@ -299,7 +362,7 @@ class UISeleniumSuite extends FunSuite with WebBrowser with Matchers with Before
       rdd.count()
       rdd.count()
       eventually(timeout(10 seconds), interval(50 milliseconds)) {
-        go to (sc.ui.get.appUIAddress.stripSuffix("/") + "/jobs")
+        goToUi(sc, "/jobs")
         findAll(cssSelector("tbody tr a")).foreach { link =>
           link.text.toLowerCase should include ("count")
           link.text.toLowerCase should not include "unknown"
@@ -321,7 +384,7 @@ class UISeleniumSuite extends FunSuite with WebBrowser with Matchers with Before
       }
       sparkUI.attachTab(newTab)
       eventually(timeout(10 seconds), interval(50 milliseconds)) {
-        go to (sc.ui.get.appUIAddress.stripSuffix("/"))
+        goToUi(sc, "")
         find(cssSelector("""ul li a[href*="jobs"]""")) should not be(None)
         find(cssSelector("""ul li a[href*="stages"]""")) should not be(None)
         find(cssSelector("""ul li a[href*="storage"]""")) should not be(None)
@@ -330,12 +393,12 @@ class UISeleniumSuite extends FunSuite with WebBrowser with Matchers with Before
       }
       eventually(timeout(10 seconds), interval(50 milliseconds)) {
         // check whether new page exists
-        go to (sc.ui.get.appUIAddress.stripSuffix("/") + "/foo")
+        goToUi(sc, "/foo")
         find(cssSelector("b")).get.text should include ("html magic")
       }
       sparkUI.detachTab(newTab)
       eventually(timeout(10 seconds), interval(50 milliseconds)) {
-        go to (sc.ui.get.appUIAddress.stripSuffix("/"))
+        goToUi(sc, "")
         find(cssSelector("""ul li a[href*="jobs"]""")) should not be(None)
         find(cssSelector("""ul li a[href*="stages"]""")) should not be(None)
         find(cssSelector("""ul li a[href*="storage"]""")) should not be(None)
@@ -344,7 +407,7 @@ class UISeleniumSuite extends FunSuite with WebBrowser with Matchers with Before
       }
       eventually(timeout(10 seconds), interval(50 milliseconds)) {
         // check new page not exist
-        go to (sc.ui.get.appUIAddress.stripSuffix("/") + "/foo")
+        goToUi(sc, "/foo")
         find(cssSelector("b")) should be(None)
       }
     }
@@ -371,4 +434,163 @@ class UISeleniumSuite extends FunSuite with WebBrowser with Matchers with Before
       }
     }
   }
+
+  test("stage & job retention") {
+    val conf = new SparkConf()
+      .setMaster("local")
+      .setAppName("test")
+      .set("spark.ui.enabled", "true")
+      .set("spark.ui.port", "0")
+      .set("spark.ui.retainedStages", "3")
+      .set("spark.ui.retainedJobs", "2")
+    val sc = new SparkContext(conf)
+    assert(sc.ui.isDefined)
+
+    withSpark(sc) { sc =>
+      // run a few jobs & stages ...
+      (0 until 5).foreach { idx =>
+        // NOTE: if we reverse the order, things don't really behave nicely
+        // we lose the stage for a job we keep, and then the job doesn't know
+        // about its last stage
+        sc.parallelize(idx to (idx + 3)).map(identity).groupBy(identity).map(identity)
+          .groupBy(identity).count()
+        sc.parallelize(idx to (idx + 3)).collect()
+      }
+
+      val expJobInfo = Seq(
+        ("9", "collect"),
+        ("8", "count")
+      )
+
+      eventually(timeout(1 second), interval(50 milliseconds)) {
+        goToUi(sc, "/jobs")
+        // The completed jobs table should have two rows. The first row will be the most recent job:
+        find("completed-summary").get.text should be ("Completed Jobs: 10, only showing 2")
+        find("completed").get.text should be ("Completed Jobs (10, only showing 2)")
+        val rows = findAll(cssSelector("tbody tr")).toIndexedSeq.map{_.underlying}
+        rows.size should be (expJobInfo.size)
+        for {
+          (row, idx) <- rows.zipWithIndex
+          columns = row.findElements(By.tagName("td"))
+          id = columns(0).getText()
+          description = columns(1).getText()
+        } {
+          id should be (expJobInfo(idx)._1)
+          description should include (expJobInfo(idx)._2)
+        }
+      }
+
+      val jobsJson = getJson(sc.ui.get, "jobs")
+      jobsJson.children.size should be (expJobInfo.size)
+      for {
+        (job @ JObject(_),idx) <- jobsJson.children.zipWithIndex
+        id = (job \ "jobId").extract[String]
+        name = (job \ "name").extract[String]
+      } {
+        withClue(s"idx = $idx; id = $id; name = ${name.substring(0,20)}") {
+          id should be (expJobInfo(idx)._1)
+          name should include (expJobInfo(idx)._2)
+        }
+      }
+
+      // what about when we query for a job that did exist, but has been cleared?
+      goToUi(sc, "/jobs/job/?id=7")
+      find("no-info").get.text should be ("No information to display for job 7")
+
+      val badJob = HistoryServerSuite.getContentAndCode(jsonUrl(sc.ui.get, "jobs/7"))
+      badJob._1 should be (HttpServletResponse.SC_NOT_FOUND)
+      badJob._2 should be (None)
+      badJob._3 should be (Some("unknown job: 7"))
+
+      val expStageInfo = Seq(
+        ("19", "collect"),
+        ("18", "count"),
+        ("17", "groupBy")
+      )
+
+      eventually(timeout(1 second), interval(50 milliseconds)) {
+        goToUi(sc, "/stages")
+        find("completed-summary").get.text should be ("Completed Stages: 20, only showing 3")
+        find("completed").get.text should be ("Completed Stages (20, only showing 3)")
+        val rows = findAll(cssSelector("tbody tr")).toIndexedSeq.map{_.underlying}
+        rows.size should be (3)
+        for {
+          (row, idx) <- rows.zipWithIndex
+          columns = row.findElements(By.tagName("td"))
+          id = columns(0).getText()
+          description = columns(1).getText()
+        } {
+          id should be (expStageInfo(idx)._1)
+          description should include (expStageInfo(idx)._2)
+        }
+      }
+
+      val stagesJson = getJson(sc.ui.get, "stages")
+      stagesJson.children.size should be (3)
+      for {
+        (stage @ JObject(_), idx) <- stagesJson.children.zipWithIndex
+        id = (stage \ "stageId").extract[String]
+        name = (stage \ "name").extract[String]
+      } {
+        id should be (expStageInfo(idx)._1)
+        name should include (expStageInfo(idx)._2)
+      }
+
+      // nonexistent stage
+
+      goToUi(sc, "/stages/stage/?id=12&attempt=0")
+      find("no-info").get.text should be ("No information to display for Stage 12 (Attempt 0)")
+      val badStage = HistoryServerSuite.getContentAndCode(jsonUrl(sc.ui.get,"stages/12/0"))
+      badStage._1 should be (HttpServletResponse.SC_NOT_FOUND)
+      badStage._2 should be (None)
+      badStage._3 should be (Some("unknown stage: 12"))
+
+      val badAttempt = HistoryServerSuite.getContentAndCode(jsonUrl(sc.ui.get,"stages/19/15"))
+      badAttempt._1 should be (HttpServletResponse.SC_NOT_FOUND)
+      badAttempt._2 should be (None)
+      badAttempt._3 should be (Some("unknown attempt for stage 19.  Found attempts: [0]"))
+
+      val badStageAttemptList = HistoryServerSuite.getContentAndCode(
+        jsonUrl(sc.ui.get, "stages/12"))
+      badStageAttemptList._1 should be (HttpServletResponse.SC_NOT_FOUND)
+      badStageAttemptList._2 should be (None)
+      badStageAttemptList._3 should be (Some("unknown stage: 12"))
+    }
+  }
+
+  test("live UI json application list") {
+    withSpark(newSparkContext()) { sc =>
+      val appListRawJson = HistoryServerSuite.getUrl(new URL(
+        sc.ui.get.appUIAddress + "/json/v1/applications"))
+      val appListJsonAst = JsonMethods.parse(appListRawJson)
+      appListJsonAst.children.length should be (1)
+      val attempts = (appListJsonAst \ "attempts").children
+      attempts.size should be (1)
+      (attempts(0) \ "completed").extract[Boolean] should be (false)
+      parseDate(attempts(0) \ "startTime") should be (sc.startTime)
+      parseDate(attempts(0) \ "endTime") should be (-1)
+      val oneAppJsonAst = getJson(sc.ui.get, "")
+      oneAppJsonAst should be (appListJsonAst.children(0))
+    }
+  }
+
+  def goToUi(sc: SparkContext, path: String): Unit = {
+    goToUi(sc.ui.get, path)
+  }
+
+  def goToUi(ui: SparkUI, path: String): Unit = {
+    go to (ui.appUIAddress.stripSuffix("/") + path)
+  }
+
+  def parseDate(json: JValue): Long = {
+    JacksonMessageWriter.makeISODateFormat.parse(json.extract[String]).getTime
+  }
+
+  def getJson(ui: SparkUI, path: String): JValue = {
+    JsonMethods.parse(HistoryServerSuite.getUrl(jsonUrl(ui, path)))
+  }
+
+  def jsonUrl(ui: SparkUI, path: String): URL = {
+    new URL(ui.appUIAddress + "/json/v1/applications/test/" + path)
+  }
 }
diff --git a/docs/monitoring.md b/docs/monitoring.md
index 8a85928d6d44d..1e0fc150862fb 100644
--- a/docs/monitoring.md
+++ b/docs/monitoring.md
@@ -174,6 +174,80 @@ making it easy to identify slow tasks, data skew, etc.
 
 Note that the history server only displays completed Spark jobs. One way to signal the completion of a Spark job is to stop the Spark Context explicitly (`sc.stop()`), or in Python using the `with SparkContext() as sc:` to handle the Spark Context setup and tear down, and still show the job history on the UI.
 
+## REST API
+
+In addition to viewing the metrics in the UI, they are also available as JSON.  This gives developers
+an easy way to create new visualizations and monitoring tools for Spark.  The JSON is available for
+both running applications, and in the history server.  The endpoints are mounted at `/json/v1`.  Eg.,
+for the history server, they would typically be accessible at `http://<server-url>:18080/json/v1`, and
+for a running application, at `http://localhost:4040/json/v1`.
+
+<table class="table">
+  <tr><th>Endpoint</th><th>Meaning</th></tr>
+  <tr>
+    <td><code>/applications</code></td>
+    <td>A list of all applications</td>
+  </tr>
+  <tr>
+    <td><code>/applications/[app-id]/jobs</code></td>
+    <td>A list of all jobs for a given application</td>
+  </tr>
+  <tr>
+    <td><code>/applications/[app-id]/jobs/[job-id]</code></td>
+    <td>Details for the given job</td>
+  </tr>
+  <tr>
+    <td><code>/applications/[app-id]/stages</code></td>
+    <td>A list of all stages for a given application</td>
+  </tr>
+  <tr>
+    <td><code>/applications/[app-id]/stages/[stage-id]</code></td>
+    <td>A list of all attempts for the given stage</td>
+  </tr>
+  <tr>
+    <td><code>/applications/[app-id]/stages/[stage-id]/[stage-attempt-id]</code></td>
+    <td>Details for the given stage attempt</td>
+  </tr>
+  <tr>
+    <td><code>/applications/[app-id]/stages/[stage-id]/[stage-attempt-id]/taskSummary</code></td>
+    <td>Summary metrics of all tasks in the given stage attempt</td>
+  </tr>
+  <tr>
+    <td><code>/applications/[app-id]/stages/[stage-id]/[stage-attempt-id]/taskList</code></td>
+    <td>A list of all tasks for the given stage attempt</td>
+  </tr>
+  <tr>
+    <td><code>/applications/[app-id]/executors</code></td>
+    <td>A list of all executors for the given application</td>
+  </tr>
+  <tr>
+    <td><code>/applications/[app-id]/storage/rdd</code></td>
+    <td>A list of stored RDDs for the given application</td>
+  </tr>
+  <tr>
+    <td><code>/applications/[app-id]/storage/rdd/[rdd-id]</code></td>
+    <td>Details for the storage status of a given RDD</td>
+  </tr>
+</table>
+
+When running on Yarn, each application has multiple attempts, so `[app-id]` is actually
+`[app-id]/[attempt-id]` in all cases.
+
+These endpoints have been strongly versioned to make it easier to develop applications on top.
+ In particular, Spark guarantees:
+
+* Endpoints will never be removed from one version
+* Individual fields will never be removed for any given endpoint
+* New endpoints may be added
+* New fields may be added to existing endpoints
+* New versions of the api may be added in the future at a separate endpoint (eg., `json/v2`).  New versions are *not* required to be backwards compatible.
+* Api versions may be dropped, but only after at least one minor release of co-existing with a new api version
+
+Note that even when examining the UI of a running applications, the `applications/[app-id]` portion is
+still required, though there is only one application available.  Eg. to see the list of jobs for the
+running app, you would go to `http://localhost:4040/json/v1/applications/[app-id]/jobs`.  This is to
+keep the paths consistent in both modes.
+
 # Metrics
 
 Spark has a configurable metrics system based on the 
diff --git a/pom.xml b/pom.xml
index 12ad7c5be4e40..94cf28332cf47 100644
--- a/pom.xml
+++ b/pom.xml
@@ -623,6 +623,18 @@
           </exclusion>
         </exclusions>
       </dependency>
+      <dependency>
+        <groupId>com.sun.jersey</groupId>
+        <artifactId>jersey-server</artifactId>
+        <version>1.9</version>
+        <scope>${hadoop.deps.scope}</scope>
+      </dependency>
+      <dependency>
+        <groupId>com.sun.jersey</groupId>
+        <artifactId>jersey-core</artifactId>
+        <version>1.9</version>
+        <scope>${hadoop.deps.scope}</scope>
+      </dependency>
       <dependency>
         <groupId>org.scala-lang</groupId>
         <artifactId>scala-compiler</artifactId>

From f5ff4a84c4c75143086aae7d38730156bee35933 Mon Sep 17 00:00:00 2001
From: Burak Yavuz <brkyvz@gmail.com>
Date: Fri, 8 May 2015 11:14:39 -0700
Subject: [PATCH 46/48] [SPARK-7383] [ML] Feature Parity in PySpark for
 ml.features

Implemented python wrappers for Scala functions that don't exist in `ml.features`

Author: Burak Yavuz <brkyvz@gmail.com>

Closes #5991 from brkyvz/ml-feat-PR and squashes the following commits:

adcca55 [Burak Yavuz] add regex tokenizer to __all__
b91cb44 [Burak Yavuz] addressed comments
bd39fd2 [Burak Yavuz] remove addition
b82bd7c [Burak Yavuz] Parity in PySpark for ml.features
---
 .../ml/feature/PolynomialExpansion.scala      |   2 +-
 .../apache/spark/ml/feature/Tokenizer.scala   |   2 +-
 python/pyspark/ml/feature.py                  | 797 +++++++++++++++++-
 .../ml/param/_shared_params_code_gen.py       |   4 +-
 python/pyspark/ml/param/shared.py             |  89 ++
 5 files changed, 851 insertions(+), 43 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala
index 63e190c8aae53..9e6177ca27e4a 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala
@@ -31,7 +31,7 @@ import org.apache.spark.sql.types.DataType
  * which is available at [[http://en.wikipedia.org/wiki/Polynomial_expansion]], "In mathematics, an
  * expansion of a product of sums expresses it as a sum of products by using the fact that
  * multiplication distributes over addition". Take a 2-variable feature vector as an example:
- * `(x, y)`, if we want to expand it with degree 2, then we get `(x, y, x * x, x * y, y * y)`.
+ * `(x, y)`, if we want to expand it with degree 2, then we get `(x, x * x, y, x * y, y * y)`.
  */
 @AlphaComponent
 class PolynomialExpansion extends UnaryTransformer[Vector, Vector, PolynomialExpansion] {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala
index 2863b7621526e..649c217b16590 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala
@@ -42,7 +42,7 @@ class Tokenizer extends UnaryTransformer[String, Seq[String], Tokenizer] {
 
 /**
  * :: AlphaComponent ::
- * A regex based tokenizer that extracts tokens either by repeatedly matching the regex(default) 
+ * A regex based tokenizer that extracts tokens either by repeatedly matching the regex(default)
  * or using it to split the text (set matching to false). Optional parameters also allow filtering
  * tokens using a minimal length.
  * It returns an array of strings that can be empty.
diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py
index 705a368192c24..f35bc1463d51b 100644
--- a/python/pyspark/ml/feature.py
+++ b/python/pyspark/ml/feature.py
@@ -16,61 +16,72 @@
 #
 
 from pyspark.rdd import ignore_unicode_prefix
-from pyspark.ml.param.shared import HasInputCol, HasInputCols, HasOutputCol, HasNumFeatures
+from pyspark.ml.param.shared import *
 from pyspark.ml.util import keyword_only
-from pyspark.ml.wrapper import JavaTransformer
+from pyspark.ml.wrapper import JavaEstimator, JavaModel, JavaTransformer
 from pyspark.mllib.common import inherit_doc
 
-__all__ = ['Tokenizer', 'HashingTF', 'VectorAssembler']
+__all__ = ['Binarizer', 'HashingTF', 'IDF', 'IDFModel', 'Normalizer', 'OneHotEncoder',
+           'PolynomialExpansion', 'RegexTokenizer', 'StandardScaler', 'StandardScalerModel',
+           'StringIndexer', 'StringIndexerModel', 'Tokenizer', 'VectorAssembler', 'VectorIndexer',
+           'Word2Vec', 'Word2VecModel']
 
 
 @inherit_doc
-@ignore_unicode_prefix
-class Tokenizer(JavaTransformer, HasInputCol, HasOutputCol):
+class Binarizer(JavaTransformer, HasInputCol, HasOutputCol):
     """
-    A tokenizer that converts the input string to lowercase and then
-    splits it by white spaces.
+    Binarize a column of continuous features given a threshold.
 
-    >>> from pyspark.sql import Row
-    >>> df = sc.parallelize([Row(text="a b c")]).toDF()
-    >>> tokenizer = Tokenizer(inputCol="text", outputCol="words")
-    >>> tokenizer.transform(df).head()
-    Row(text=u'a b c', words=[u'a', u'b', u'c'])
-    >>> # Change a parameter.
-    >>> tokenizer.setParams(outputCol="tokens").transform(df).head()
-    Row(text=u'a b c', tokens=[u'a', u'b', u'c'])
-    >>> # Temporarily modify a parameter.
-    >>> tokenizer.transform(df, {tokenizer.outputCol: "words"}).head()
-    Row(text=u'a b c', words=[u'a', u'b', u'c'])
-    >>> tokenizer.transform(df).head()
-    Row(text=u'a b c', tokens=[u'a', u'b', u'c'])
-    >>> # Must use keyword arguments to specify params.
-    >>> tokenizer.setParams("text")
-    Traceback (most recent call last):
-        ...
-    TypeError: Method setParams forces keyword arguments.
+    >>> df = sqlContext.createDataFrame([(0.5,)], ["values"])
+    >>> binarizer = Binarizer(threshold=1.0, inputCol="values", outputCol="features")
+    >>> binarizer.transform(df).head().features
+    0.0
+    >>> binarizer.setParams(outputCol="freqs").transform(df).head().freqs
+    0.0
+    >>> params = {binarizer.threshold: -0.5, binarizer.outputCol: "vector"}
+    >>> binarizer.transform(df, params).head().vector
+    1.0
     """
 
-    _java_class = "org.apache.spark.ml.feature.Tokenizer"
+    _java_class = "org.apache.spark.ml.feature.Binarizer"
+    # a placeholder to make it appear in the generated doc
+    threshold = Param(Params._dummy(), "threshold",
+                      "threshold in binary classification prediction, in range [0, 1]")
 
     @keyword_only
-    def __init__(self, inputCol=None, outputCol=None):
+    def __init__(self, threshold=0.0, inputCol=None, outputCol=None):
         """
-        __init__(self, inputCol=None, outputCol=None)
+        __init__(self, threshold=0.0, inputCol=None, outputCol=None)
         """
-        super(Tokenizer, self).__init__()
+        super(Binarizer, self).__init__()
+        self.threshold = Param(self, "threshold",
+                               "threshold in binary classification prediction, in range [0, 1]")
+        self._setDefault(threshold=0.0)
         kwargs = self.__init__._input_kwargs
         self.setParams(**kwargs)
 
     @keyword_only
-    def setParams(self, inputCol=None, outputCol=None):
+    def setParams(self, threshold=0.0, inputCol=None, outputCol=None):
         """
-        setParams(self, inputCol="input", outputCol="output")
-        Sets params for this Tokenizer.
+        setParams(self, threshold=0.0, inputCol=None, outputCol=None)
+        Sets params for this Binarizer.
         """
         kwargs = self.setParams._input_kwargs
         return self._set(**kwargs)
 
+    def setThreshold(self, value):
+        """
+        Sets the value of :py:attr:`threshold`.
+        """
+        self.paramMap[self.threshold] = value
+        return self
+
+    def getThreshold(self):
+        """
+        Gets the value of threshold or its default value.
+        """
+        return self.getOrDefault(self.threshold)
+
 
 @inherit_doc
 class HashingTF(JavaTransformer, HasInputCol, HasOutputCol, HasNumFeatures):
@@ -78,8 +89,7 @@ class HashingTF(JavaTransformer, HasInputCol, HasOutputCol, HasNumFeatures):
     Maps a sequence of terms to their term frequencies using the
     hashing trick.
 
-    >>> from pyspark.sql import Row
-    >>> df = sc.parallelize([Row(words=["a", "b", "c"])]).toDF()
+    >>> df = sqlContext.createDataFrame([(["a", "b", "c"],)], ["words"])
     >>> hashingTF = HashingTF(numFeatures=10, inputCol="words", outputCol="features")
     >>> hashingTF.transform(df).head().features
     SparseVector(10, {7: 1.0, 8: 1.0, 9: 1.0})
@@ -112,13 +122,520 @@ def setParams(self, numFeatures=1 << 18, inputCol=None, outputCol=None):
         return self._set(**kwargs)
 
 
+@inherit_doc
+class IDF(JavaEstimator, HasInputCol, HasOutputCol):
+    """
+    Compute the Inverse Document Frequency (IDF) given a collection of documents.
+
+    >>> from pyspark.mllib.linalg import DenseVector
+    >>> df = sqlContext.createDataFrame([(DenseVector([1.0, 2.0]),),
+    ...     (DenseVector([0.0, 1.0]),), (DenseVector([3.0, 0.2]),)], ["tf"])
+    >>> idf = IDF(minDocFreq=3, inputCol="tf", outputCol="idf")
+    >>> idf.fit(df).transform(df).head().idf
+    DenseVector([0.0, 0.0])
+    >>> idf.setParams(outputCol="freqs").fit(df).transform(df).collect()[1].freqs
+    DenseVector([0.0, 0.0])
+    >>> params = {idf.minDocFreq: 1, idf.outputCol: "vector"}
+    >>> idf.fit(df, params).transform(df).head().vector
+    DenseVector([0.2877, 0.0])
+    """
+
+    _java_class = "org.apache.spark.ml.feature.IDF"
+
+    # a placeholder to make it appear in the generated doc
+    minDocFreq = Param(Params._dummy(), "minDocFreq",
+                       "minimum of documents in which a term should appear for filtering")
+
+    @keyword_only
+    def __init__(self, minDocFreq=0, inputCol=None, outputCol=None):
+        """
+        __init__(self, minDocFreq=0, inputCol=None, outputCol=None)
+        """
+        super(IDF, self).__init__()
+        self.minDocFreq = Param(self, "minDocFreq",
+                                "minimum of documents in which a term should appear for filtering")
+        self._setDefault(minDocFreq=0)
+        kwargs = self.__init__._input_kwargs
+        self.setParams(**kwargs)
+
+    @keyword_only
+    def setParams(self, minDocFreq=0, inputCol=None, outputCol=None):
+        """
+        setParams(self, minDocFreq=0, inputCol=None, outputCol=None)
+        Sets params for this IDF.
+        """
+        kwargs = self.setParams._input_kwargs
+        return self._set(**kwargs)
+
+    def setMinDocFreq(self, value):
+        """
+        Sets the value of :py:attr:`minDocFreq`.
+        """
+        self.paramMap[self.minDocFreq] = value
+        return self
+
+    def getMinDocFreq(self):
+        """
+        Gets the value of minDocFreq or its default value.
+        """
+        return self.getOrDefault(self.minDocFreq)
+
+
+class IDFModel(JavaModel):
+    """
+    Model fitted by IDF.
+    """
+
+
+@inherit_doc
+class Normalizer(JavaTransformer, HasInputCol, HasOutputCol):
+    """
+     Normalize a vector to have unit norm using the given p-norm.
+
+    >>> from pyspark.mllib.linalg import Vectors
+    >>> svec = Vectors.sparse(4, {1: 4.0, 3: 3.0})
+    >>> df = sqlContext.createDataFrame([(Vectors.dense([3.0, -4.0]), svec)], ["dense", "sparse"])
+    >>> normalizer = Normalizer(p=2.0, inputCol="dense", outputCol="features")
+    >>> normalizer.transform(df).head().features
+    DenseVector([0.6, -0.8])
+    >>> normalizer.setParams(inputCol="sparse", outputCol="freqs").transform(df).head().freqs
+    SparseVector(4, {1: 0.8, 3: 0.6})
+    >>> params = {normalizer.p: 1.0, normalizer.inputCol: "dense", normalizer.outputCol: "vector"}
+    >>> normalizer.transform(df, params).head().vector
+    DenseVector([0.4286, -0.5714])
+    """
+
+    # a placeholder to make it appear in the generated doc
+    p = Param(Params._dummy(), "p", "the p norm value.")
+
+    _java_class = "org.apache.spark.ml.feature.Normalizer"
+
+    @keyword_only
+    def __init__(self, p=2.0, inputCol=None, outputCol=None):
+        """
+        __init__(self, p=2.0, inputCol=None, outputCol=None)
+        """
+        super(Normalizer, self).__init__()
+        self.p = Param(self, "p", "the p norm value.")
+        self._setDefault(p=2.0)
+        kwargs = self.__init__._input_kwargs
+        self.setParams(**kwargs)
+
+    @keyword_only
+    def setParams(self, p=2.0, inputCol=None, outputCol=None):
+        """
+        setParams(self, p=2.0, inputCol=None, outputCol=None)
+        Sets params for this Normalizer.
+        """
+        kwargs = self.setParams._input_kwargs
+        return self._set(**kwargs)
+
+    def setP(self, value):
+        """
+        Sets the value of :py:attr:`p`.
+        """
+        self.paramMap[self.p] = value
+        return self
+
+    def getP(self):
+        """
+        Gets the value of p or its default value.
+        """
+        return self.getOrDefault(self.p)
+
+
+@inherit_doc
+class OneHotEncoder(JavaTransformer, HasInputCol, HasOutputCol):
+    """
+    A one-hot encoder that maps a column of label indices to a column of binary vectors, with
+    at most a single one-value. By default, the binary vector has an element for each category, so
+    with 5 categories, an input value of 2.0 would map to an output vector of
+    (0.0, 0.0, 1.0, 0.0, 0.0). If includeFirst is set to false, the first category is omitted, so
+    the output vector for the previous example would be (0.0, 1.0, 0.0, 0.0) and an input value
+    of 0.0 would map to a vector of all zeros. Including the first category makes the vector columns
+    linearly dependent because they sum up to one.
+
+    TODO: This method requires the use of StringIndexer first. Decouple them.
+
+    >>> stringIndexer = StringIndexer(inputCol="label", outputCol="indexed")
+    >>> model = stringIndexer.fit(stringIndDf)
+    >>> td = model.transform(stringIndDf)
+    >>> encoder = OneHotEncoder(includeFirst=False, inputCol="indexed", outputCol="features")
+    >>> encoder.transform(td).head().features
+    SparseVector(2, {})
+    >>> encoder.setParams(outputCol="freqs").transform(td).head().freqs
+    SparseVector(2, {})
+    >>> params = {encoder.includeFirst: True, encoder.outputCol: "test"}
+    >>> encoder.transform(td, params).head().test
+    SparseVector(3, {0: 1.0})
+    """
+
+    _java_class = "org.apache.spark.ml.feature.OneHotEncoder"
+
+    # a placeholder to make it appear in the generated doc
+    includeFirst = Param(Params._dummy(), "includeFirst", "include first category")
+
+    @keyword_only
+    def __init__(self, includeFirst=True, inputCol=None, outputCol=None):
+        """
+        __init__(self, includeFirst=True, inputCol=None, outputCol=None)
+        """
+        super(OneHotEncoder, self).__init__()
+        self.includeFirst = Param(self, "includeFirst", "include first category")
+        self._setDefault(includeFirst=True)
+        kwargs = self.__init__._input_kwargs
+        self.setParams(**kwargs)
+
+    @keyword_only
+    def setParams(self, includeFirst=True, inputCol=None, outputCol=None):
+        """
+        setParams(self, includeFirst=True, inputCol=None, outputCol=None)
+        Sets params for this OneHotEncoder.
+        """
+        kwargs = self.setParams._input_kwargs
+        return self._set(**kwargs)
+
+    def setIncludeFirst(self, value):
+        """
+        Sets the value of :py:attr:`includeFirst`.
+        """
+        self.paramMap[self.includeFirst] = value
+        return self
+
+    def getIncludeFirst(self):
+        """
+        Gets the value of includeFirst or its default value.
+        """
+        return self.getOrDefault(self.includeFirst)
+
+
+@inherit_doc
+class PolynomialExpansion(JavaTransformer, HasInputCol, HasOutputCol):
+    """
+    Perform feature expansion in a polynomial space. As said in wikipedia of Polynomial Expansion,
+    which is available at `http://en.wikipedia.org/wiki/Polynomial_expansion`, "In mathematics, an
+    expansion of a product of sums expresses it as a sum of products by using the fact that
+    multiplication distributes over addition". Take a 2-variable feature vector as an example:
+    `(x, y)`, if we want to expand it with degree 2, then we get `(x, x * x, y, x * y, y * y)`.
+
+    >>> from pyspark.mllib.linalg import Vectors
+    >>> df = sqlContext.createDataFrame([(Vectors.dense([0.5, 2.0]),)], ["dense"])
+    >>> px = PolynomialExpansion(degree=2, inputCol="dense", outputCol="expanded")
+    >>> px.transform(df).head().expanded
+    DenseVector([0.5, 0.25, 2.0, 1.0, 4.0])
+    >>> px.setParams(outputCol="test").transform(df).head().test
+    DenseVector([0.5, 0.25, 2.0, 1.0, 4.0])
+    """
+
+    _java_class = "org.apache.spark.ml.feature.PolynomialExpansion"
+
+    # a placeholder to make it appear in the generated doc
+    degree = Param(Params._dummy(), "degree", "the polynomial degree to expand (>= 1)")
+
+    @keyword_only
+    def __init__(self, degree=2, inputCol=None, outputCol=None):
+        """
+        __init__(self, degree=2, inputCol=None, outputCol=None)
+        """
+        super(PolynomialExpansion, self).__init__()
+        self.degree = Param(self, "degree", "the polynomial degree to expand (>= 1)")
+        self._setDefault(degree=2)
+        kwargs = self.__init__._input_kwargs
+        self.setParams(**kwargs)
+
+    @keyword_only
+    def setParams(self, degree=2, inputCol=None, outputCol=None):
+        """
+        setParams(self, degree=2, inputCol=None, outputCol=None)
+        Sets params for this PolynomialExpansion.
+        """
+        kwargs = self.setParams._input_kwargs
+        return self._set(**kwargs)
+
+    def setDegree(self, value):
+        """
+        Sets the value of :py:attr:`degree`.
+        """
+        self.paramMap[self.degree] = value
+        return self
+
+    def getDegree(self):
+        """
+        Gets the value of degree or its default value.
+        """
+        return self.getOrDefault(self.degree)
+
+
+@inherit_doc
+@ignore_unicode_prefix
+class RegexTokenizer(JavaTransformer, HasInputCol, HasOutputCol):
+    """
+    A regex based tokenizer that extracts tokens either by repeatedly matching the regex(default)
+    or using it to split the text (set matching to false). Optional parameters also allow filtering
+    tokens using a minimal length.
+    It returns an array of strings that can be empty.
+
+    >>> df = sqlContext.createDataFrame([("a b c",)], ["text"])
+    >>> reTokenizer = RegexTokenizer(inputCol="text", outputCol="words")
+    >>> reTokenizer.transform(df).head()
+    Row(text=u'a b c', words=[u'a', u'b', u'c'])
+    >>> # Change a parameter.
+    >>> reTokenizer.setParams(outputCol="tokens").transform(df).head()
+    Row(text=u'a b c', tokens=[u'a', u'b', u'c'])
+    >>> # Temporarily modify a parameter.
+    >>> reTokenizer.transform(df, {reTokenizer.outputCol: "words"}).head()
+    Row(text=u'a b c', words=[u'a', u'b', u'c'])
+    >>> reTokenizer.transform(df).head()
+    Row(text=u'a b c', tokens=[u'a', u'b', u'c'])
+    >>> # Must use keyword arguments to specify params.
+    >>> reTokenizer.setParams("text")
+    Traceback (most recent call last):
+        ...
+    TypeError: Method setParams forces keyword arguments.
+    """
+
+    _java_class = "org.apache.spark.ml.feature.RegexTokenizer"
+    # a placeholder to make it appear in the generated doc
+    minTokenLength = Param(Params._dummy(), "minTokenLength", "minimum token length (>= 0)")
+    gaps = Param(Params._dummy(), "gaps", "Set regex to match gaps or tokens")
+    pattern = Param(Params._dummy(), "pattern", "regex pattern used for tokenizing")
+
+    @keyword_only
+    def __init__(self, minTokenLength=1, gaps=False, pattern="\\p{L}+|[^\\p{L}\\s]+",
+                 inputCol=None, outputCol=None):
+        """
+        __init__(self, minTokenLength=1, gaps=False, pattern="\\p{L}+|[^\\p{L}\\s]+",
+                 inputCol=None, outputCol=None)
+        """
+        super(RegexTokenizer, self).__init__()
+        self.minTokenLength = Param(self, "minLength", "minimum token length (>= 0)")
+        self.gaps = Param(self, "gaps", "Set regex to match gaps or tokens")
+        self.pattern = Param(self, "pattern", "regex pattern used for tokenizing")
+        self._setDefault(minTokenLength=1, gaps=False, pattern="\\p{L}+|[^\\p{L}\\s]+")
+        kwargs = self.__init__._input_kwargs
+        self.setParams(**kwargs)
+
+    @keyword_only
+    def setParams(self, minTokenLength=1, gaps=False, pattern="\\p{L}+|[^\\p{L}\\s]+",
+                  inputCol=None, outputCol=None):
+        """
+        setParams(self, minTokenLength=1, gaps=False, pattern="\\p{L}+|[^\\p{L}\\s]+",
+                  inputCol="input", outputCol="output")
+        Sets params for this RegexTokenizer.
+        """
+        kwargs = self.setParams._input_kwargs
+        return self._set(**kwargs)
+
+    def setMinTokenLength(self, value):
+        """
+        Sets the value of :py:attr:`minTokenLength`.
+        """
+        self.paramMap[self.minTokenLength] = value
+        return self
+
+    def getMinTokenLength(self):
+        """
+        Gets the value of minTokenLength or its default value.
+        """
+        return self.getOrDefault(self.minTokenLength)
+
+    def setGaps(self, value):
+        """
+        Sets the value of :py:attr:`gaps`.
+        """
+        self.paramMap[self.gaps] = value
+        return self
+
+    def getGaps(self):
+        """
+        Gets the value of gaps or its default value.
+        """
+        return self.getOrDefault(self.gaps)
+
+    def setPattern(self, value):
+        """
+        Sets the value of :py:attr:`pattern`.
+        """
+        self.paramMap[self.pattern] = value
+        return self
+
+    def getPattern(self):
+        """
+        Gets the value of pattern or its default value.
+        """
+        return self.getOrDefault(self.pattern)
+
+
+@inherit_doc
+class StandardScaler(JavaEstimator, HasInputCol, HasOutputCol):
+    """
+    Standardizes features by removing the mean and scaling to unit variance using column summary
+    statistics on the samples in the training set.
+
+    >>> from pyspark.mllib.linalg import Vectors
+    >>> df = sqlContext.createDataFrame([(Vectors.dense([0.0]),), (Vectors.dense([2.0]),)], ["a"])
+    >>> standardScaler = StandardScaler(inputCol="a", outputCol="scaled")
+    >>> model = standardScaler.fit(df)
+    >>> model.transform(df).collect()[1].scaled
+    DenseVector([1.4142])
+    """
+
+    _java_class = "org.apache.spark.ml.feature.StandardScaler"
+
+    # a placeholder to make it appear in the generated doc
+    withMean = Param(Params._dummy(), "withMean", "Center data with mean")
+    withStd = Param(Params._dummy(), "withStd", "Scale to unit standard deviation")
+
+    @keyword_only
+    def __init__(self, withMean=False, withStd=True, inputCol=None, outputCol=None):
+        """
+        __init__(self, withMean=False, withStd=True, inputCol=None, outputCol=None)
+        """
+        super(StandardScaler, self).__init__()
+        self.withMean = Param(self, "withMean", "Center data with mean")
+        self.withStd = Param(self, "withStd", "Scale to unit standard deviation")
+        self._setDefault(withMean=False, withStd=True)
+        kwargs = self.__init__._input_kwargs
+        self.setParams(**kwargs)
+
+    @keyword_only
+    def setParams(self, withMean=False, withStd=True, inputCol=None, outputCol=None):
+        """
+        setParams(self, withMean=False, withStd=True, inputCol=None, outputCol=None)
+        Sets params for this StandardScaler.
+        """
+        kwargs = self.setParams._input_kwargs
+        return self._set(**kwargs)
+
+    def setWithMean(self, value):
+        """
+        Sets the value of :py:attr:`withMean`.
+        """
+        self.paramMap[self.withMean] = value
+        return self
+
+    def getWithMean(self):
+        """
+        Gets the value of withMean or its default value.
+        """
+        return self.getOrDefault(self.withMean)
+
+    def setWithStd(self, value):
+        """
+        Sets the value of :py:attr:`withStd`.
+        """
+        self.paramMap[self.withStd] = value
+        return self
+
+    def getWithStd(self):
+        """
+        Gets the value of withStd or its default value.
+        """
+        return self.getOrDefault(self.withStd)
+
+
+class StandardScalerModel(JavaModel):
+    """
+    Model fitted by StandardScaler.
+    """
+
+
+@inherit_doc
+class StringIndexer(JavaEstimator, HasInputCol, HasOutputCol):
+    """
+    A label indexer that maps a string column of labels to an ML column of label indices.
+    If the input column is numeric, we cast it to string and index the string values.
+    The indices are in [0, numLabels), ordered by label frequencies.
+    So the most frequent label gets index 0.
+
+    >>> stringIndexer = StringIndexer(inputCol="label", outputCol="indexed")
+    >>> model = stringIndexer.fit(stringIndDf)
+    >>> td = model.transform(stringIndDf)
+    >>> sorted(set([(i[0], i[1]) for i in td.select(td.id, td.indexed).collect()]),
+    ...     key=lambda x: x[0])
+    [(0, 0.0), (1, 2.0), (2, 1.0), (3, 0.0), (4, 0.0), (5, 1.0)]
+    """
+
+    _java_class = "org.apache.spark.ml.feature.StringIndexer"
+
+    @keyword_only
+    def __init__(self, inputCol=None, outputCol=None):
+        """
+        __init__(self, inputCol=None, outputCol=None)
+        """
+        super(StringIndexer, self).__init__()
+        kwargs = self.__init__._input_kwargs
+        self.setParams(**kwargs)
+
+    @keyword_only
+    def setParams(self, inputCol=None, outputCol=None):
+        """
+        setParams(self, inputCol=None, outputCol=None)
+        Sets params for this StringIndexer.
+        """
+        kwargs = self.setParams._input_kwargs
+        return self._set(**kwargs)
+
+
+class StringIndexerModel(JavaModel):
+    """
+    Model fitted by StringIndexer.
+    """
+
+
+@inherit_doc
+@ignore_unicode_prefix
+class Tokenizer(JavaTransformer, HasInputCol, HasOutputCol):
+    """
+    A tokenizer that converts the input string to lowercase and then
+    splits it by white spaces.
+
+    >>> df = sqlContext.createDataFrame([("a b c",)], ["text"])
+    >>> tokenizer = Tokenizer(inputCol="text", outputCol="words")
+    >>> tokenizer.transform(df).head()
+    Row(text=u'a b c', words=[u'a', u'b', u'c'])
+    >>> # Change a parameter.
+    >>> tokenizer.setParams(outputCol="tokens").transform(df).head()
+    Row(text=u'a b c', tokens=[u'a', u'b', u'c'])
+    >>> # Temporarily modify a parameter.
+    >>> tokenizer.transform(df, {tokenizer.outputCol: "words"}).head()
+    Row(text=u'a b c', words=[u'a', u'b', u'c'])
+    >>> tokenizer.transform(df).head()
+    Row(text=u'a b c', tokens=[u'a', u'b', u'c'])
+    >>> # Must use keyword arguments to specify params.
+    >>> tokenizer.setParams("text")
+    Traceback (most recent call last):
+        ...
+    TypeError: Method setParams forces keyword arguments.
+    """
+
+    _java_class = "org.apache.spark.ml.feature.Tokenizer"
+
+    @keyword_only
+    def __init__(self, inputCol=None, outputCol=None):
+        """
+        __init__(self, inputCol=None, outputCol=None)
+        """
+        super(Tokenizer, self).__init__()
+        kwargs = self.__init__._input_kwargs
+        self.setParams(**kwargs)
+
+    @keyword_only
+    def setParams(self, inputCol=None, outputCol=None):
+        """
+        setParams(self, inputCol="input", outputCol="output")
+        Sets params for this Tokenizer.
+        """
+        kwargs = self.setParams._input_kwargs
+        return self._set(**kwargs)
+
+
 @inherit_doc
 class VectorAssembler(JavaTransformer, HasInputCols, HasOutputCol):
     """
     A feature transformer that merges multiple columns into a vector column.
 
-    >>> from pyspark.sql import Row
-    >>> df = sc.parallelize([Row(a=1, b=0, c=3)]).toDF()
+    >>> df = sqlContext.createDataFrame([(1, 0, 3)], ["a", "b", "c"])
     >>> vecAssembler = VectorAssembler(inputCols=["a", "b", "c"], outputCol="features")
     >>> vecAssembler.transform(df).head().features
     DenseVector([1.0, 0.0, 3.0])
@@ -137,7 +654,6 @@ def __init__(self, inputCols=None, outputCol=None):
         __init__(self, inputCols=None, outputCol=None)
         """
         super(VectorAssembler, self).__init__()
-        self._setDefault()
         kwargs = self.__init__._input_kwargs
         self.setParams(**kwargs)
 
@@ -151,10 +667,208 @@ def setParams(self, inputCols=None, outputCol=None):
         return self._set(**kwargs)
 
 
+@inherit_doc
+class VectorIndexer(JavaEstimator, HasInputCol, HasOutputCol):
+    """
+    Class for indexing categorical feature columns in a dataset of [[Vector]].
+
+    This has 2 usage modes:
+      - Automatically identify categorical features (default behavior)
+         - This helps process a dataset of unknown vectors into a dataset with some continuous
+           features and some categorical features. The choice between continuous and categorical
+           is based upon a maxCategories parameter.
+         - Set maxCategories to the maximum number of categorical any categorical feature should
+           have.
+         - E.g.: Feature 0 has unique values {-1.0, 0.0}, and feature 1 values {1.0, 3.0, 5.0}.
+           If maxCategories = 2, then feature 0 will be declared categorical and use indices {0, 1},
+           and feature 1 will be declared continuous.
+      - Index all features, if all features are categorical
+         - If maxCategories is set to be very large, then this will build an index of unique
+           values for all features.
+         - Warning: This can cause problems if features are continuous since this will collect ALL
+           unique values to the driver.
+         - E.g.: Feature 0 has unique values {-1.0, 0.0}, and feature 1 values {1.0, 3.0, 5.0}.
+           If maxCategories >= 3, then both features will be declared categorical.
+
+     This returns a model which can transform categorical features to use 0-based indices.
+
+    Index stability:
+      - This is not guaranteed to choose the same category index across multiple runs.
+      - If a categorical feature includes value 0, then this is guaranteed to map value 0 to
+        index 0. This maintains vector sparsity.
+      - More stability may be added in the future.
+
+     TODO: Future extensions: The following functionality is planned for the future:
+      - Preserve metadata in transform; if a feature's metadata is already present,
+        do not recompute.
+      - Specify certain features to not index, either via a parameter or via existing metadata.
+      - Add warning if a categorical feature has only 1 category.
+      - Add option for allowing unknown categories.
+
+    >>> from pyspark.mllib.linalg import Vectors
+    >>> df = sqlContext.createDataFrame([(Vectors.dense([-1.0, 0.0]),),
+    ...     (Vectors.dense([0.0, 1.0]),), (Vectors.dense([0.0, 2.0]),)], ["a"])
+    >>> indexer = VectorIndexer(maxCategories=2, inputCol="a", outputCol="indexed")
+    >>> model = indexer.fit(df)
+    >>> model.transform(df).head().indexed
+    DenseVector([1.0, 0.0])
+    >>> indexer.setParams(outputCol="test").fit(df).transform(df).collect()[1].test
+    DenseVector([0.0, 1.0])
+    >>> params = {indexer.maxCategories: 3, indexer.outputCol: "vector"}
+    >>> model2 = indexer.fit(df, params)
+    >>> model2.transform(df).head().vector
+    DenseVector([1.0, 0.0])
+    """
+
+    _java_class = "org.apache.spark.ml.feature.VectorIndexer"
+    # a placeholder to make it appear in the generated doc
+    maxCategories = Param(Params._dummy(), "maxCategories",
+                          "Threshold for the number of values a categorical feature can take " +
+                          "(>= 2). If a feature is found to have > maxCategories values, then " +
+                          "it is declared continuous.")
+
+    @keyword_only
+    def __init__(self, maxCategories=20, inputCol=None, outputCol=None):
+        """
+        __init__(self, maxCategories=20, inputCol=None, outputCol=None)
+        """
+        super(VectorIndexer, self).__init__()
+        self.maxCategories = Param(self, "maxCategories",
+                                   "Threshold for the number of values a categorical feature " +
+                                   "can take (>= 2). If a feature is found to have " +
+                                   "> maxCategories values, then it is declared continuous.")
+        self._setDefault(maxCategories=20)
+        kwargs = self.__init__._input_kwargs
+        self.setParams(**kwargs)
+
+    @keyword_only
+    def setParams(self, maxCategories=20, inputCol=None, outputCol=None):
+        """
+        setParams(self, maxCategories=20, inputCol=None, outputCol=None)
+        Sets params for this VectorIndexer.
+        """
+        kwargs = self.setParams._input_kwargs
+        return self._set(**kwargs)
+
+    def setMaxCategories(self, value):
+        """
+        Sets the value of :py:attr:`maxCategories`.
+        """
+        self.paramMap[self.maxCategories] = value
+        return self
+
+    def getMaxCategories(self):
+        """
+        Gets the value of maxCategories or its default value.
+        """
+        return self.getOrDefault(self.maxCategories)
+
+
+@inherit_doc
+@ignore_unicode_prefix
+class Word2Vec(JavaEstimator, HasStepSize, HasMaxIter, HasSeed, HasInputCol, HasOutputCol):
+    """
+    Word2Vec trains a model of `Map(String, Vector)`, i.e. transforms a word into a code for further
+    natural language processing or machine learning process.
+
+    >>> sent = ("a b " * 100 + "a c " * 10).split(" ")
+    >>> doc = sqlContext.createDataFrame([(sent,), (sent,)], ["sentence"])
+    >>> model = Word2Vec(vectorSize=5, seed=42, inputCol="sentence", outputCol="model").fit(doc)
+    >>> model.transform(doc).head().model
+    DenseVector([-0.0422, -0.5138, -0.2546, 0.6885, 0.276])
+    """
+
+    _java_class = "org.apache.spark.ml.feature.Word2Vec"
+    # a placeholder to make it appear in the generated doc
+    vectorSize = Param(Params._dummy(), "vectorSize",
+                       "the dimension of codes after transforming from words")
+    numPartitions = Param(Params._dummy(), "numPartitions",
+                          "number of partitions for sentences of words")
+    minCount = Param(Params._dummy(), "minCount",
+                     "the minimum number of times a token must appear to be included in the " +
+                     "word2vec model's vocabulary")
+
+    @keyword_only
+    def __init__(self, vectorSize=100, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1,
+                 seed=42, inputCol=None, outputCol=None):
+        """
+        __init__(self, vectorSize=100, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1,
+                 seed=42, inputCol=None, outputCol=None)
+        """
+        super(Word2Vec, self).__init__()
+        self.vectorSize = Param(self, "vectorSize",
+                                "the dimension of codes after transforming from words")
+        self.numPartitions = Param(self, "numPartitions",
+                                   "number of partitions for sentences of words")
+        self.minCount = Param(self, "minCount",
+                              "the minimum number of times a token must appear to be included " +
+                              "in the word2vec model's vocabulary")
+        self._setDefault(vectorSize=100, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1,
+                         seed=42)
+        kwargs = self.__init__._input_kwargs
+        self.setParams(**kwargs)
+
+    @keyword_only
+    def setParams(self, vectorSize=100, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1,
+                  seed=42, inputCol=None, outputCol=None):
+        """
+        setParams(self, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1, seed=42,
+                 inputCol=None, outputCol=None)
+        Sets params for this Word2Vec.
+        """
+        kwargs = self.setParams._input_kwargs
+        return self._set(**kwargs)
+
+    def setVectorSize(self, value):
+        """
+        Sets the value of :py:attr:`vectorSize`.
+        """
+        self.paramMap[self.vectorSize] = value
+        return self
+
+    def getVectorSize(self):
+        """
+        Gets the value of vectorSize or its default value.
+        """
+        return self.getOrDefault(self.vectorSize)
+
+    def setNumPartitions(self, value):
+        """
+        Sets the value of :py:attr:`numPartitions`.
+        """
+        self.paramMap[self.numPartitions] = value
+        return self
+
+    def getNumPartitions(self):
+        """
+        Gets the value of numPartitions or its default value.
+        """
+        return self.getOrDefault(self.numPartitions)
+
+    def setMinCount(self, value):
+        """
+        Sets the value of :py:attr:`minCount`.
+        """
+        self.paramMap[self.minCount] = value
+        return self
+
+    def getMinCount(self):
+        """
+        Gets the value of minCount or its default value.
+        """
+        return self.getOrDefault(self.minCount)
+
+
+class Word2VecModel(JavaModel):
+    """
+    Model fitted by Word2Vec.
+    """
+
+
 if __name__ == "__main__":
     import doctest
     from pyspark.context import SparkContext
-    from pyspark.sql import SQLContext
+    from pyspark.sql import Row, SQLContext
     globs = globals().copy()
     # The small batch size here ensures that we see multiple batches,
     # even in these small test examples:
@@ -162,8 +876,11 @@ def setParams(self, inputCols=None, outputCol=None):
     sqlContext = SQLContext(sc)
     globs['sc'] = sc
     globs['sqlContext'] = sqlContext
-    (failure_count, test_count) = doctest.testmod(
-        globs=globs, optionflags=doctest.ELLIPSIS)
+    testData = sc.parallelize([Row(id=0, label="a"), Row(id=1, label="b"),
+                               Row(id=2, label="c"), Row(id=3, label="a"),
+                               Row(id=4, label="a"), Row(id=5, label="c")], 2)
+    globs['stringIndDf'] = sqlContext.createDataFrame(testData)
+    (failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS)
     sc.stop()
     if failure_count:
         exit(-1)
diff --git a/python/pyspark/ml/param/_shared_params_code_gen.py b/python/pyspark/ml/param/_shared_params_code_gen.py
index c1c8e921dda87..ee901f25847bc 100644
--- a/python/pyspark/ml/param/_shared_params_code_gen.py
+++ b/python/pyspark/ml/param/_shared_params_code_gen.py
@@ -97,7 +97,9 @@ def get$Name(self):
         ("inputCol", "input column name", None),
         ("inputCols", "input column names", None),
         ("outputCol", "output column name", None),
-        ("numFeatures", "number of features", None)]
+        ("seed", "random seed", None),
+        ("tol", "the convergence tolerance for iterative algorithms", None),
+        ("stepSize", "Step size to be used for each iteration of optimization.", None)]
     code = []
     for name, doc, defaultValueStr in shared:
         code.append(_gen_param_code(name, doc, defaultValueStr))
diff --git a/python/pyspark/ml/param/shared.py b/python/pyspark/ml/param/shared.py
index aaf80f00085bf..5e7529c1dcf1e 100644
--- a/python/pyspark/ml/param/shared.py
+++ b/python/pyspark/ml/param/shared.py
@@ -308,3 +308,92 @@ def getNumFeatures(self):
         Gets the value of numFeatures or its default value.
         """
         return self.getOrDefault(self.numFeatures)
+
+
+class HasSeed(Params):
+    """
+    Mixin for param seed: random seed.
+    """
+
+    # a placeholder to make it appear in the generated doc
+    seed = Param(Params._dummy(), "seed", "random seed")
+
+    def __init__(self):
+        super(HasSeed, self).__init__()
+        #: param for random seed
+        self.seed = Param(self, "seed", "random seed")
+        if None is not None:
+            self._setDefault(seed=None)
+
+    def setSeed(self, value):
+        """
+        Sets the value of :py:attr:`seed`.
+        """
+        self.paramMap[self.seed] = value
+        return self
+
+    def getSeed(self):
+        """
+        Gets the value of seed or its default value.
+        """
+        return self.getOrDefault(self.seed)
+
+
+class HasTol(Params):
+    """
+    Mixin for param tol: the convergence tolerance for iterative algorithms.
+    """
+
+    # a placeholder to make it appear in the generated doc
+    tol = Param(Params._dummy(), "tol", "the convergence tolerance for iterative algorithms")
+
+    def __init__(self):
+        super(HasTol, self).__init__()
+        #: param for the convergence tolerance for iterative algorithms
+        self.tol = Param(self, "tol", "the convergence tolerance for iterative algorithms")
+        if None is not None:
+            self._setDefault(tol=None)
+
+    def setTol(self, value):
+        """
+        Sets the value of :py:attr:`tol`.
+        """
+        self.paramMap[self.tol] = value
+        return self
+
+    def getTol(self):
+        """
+        Gets the value of tol or its default value.
+        """
+        return self.getOrDefault(self.tol)
+
+
+class HasStepSize(Params):
+    """
+    Mixin for param stepSize: Step size to be used for each iteration of optimization..
+    """
+
+    # a placeholder to make it appear in the generated doc
+    stepSize = Param(Params._dummy(), "stepSize",
+                     "Step size to be used for each iteration of optimization.")
+
+    def __init__(self):
+        super(HasStepSize, self).__init__()
+        #: param for Step size to be used for each iteration of optimization.
+        self.stepSize = Param(self, "stepSize",
+                              "Step size to be used for each iteration of optimization.")
+        if None is not None:
+            self._setDefault(stepSize=None)
+
+    def setStepSize(self, value):
+        """
+        Sets the value of :py:attr:`stepSize`.
+        """
+        self.paramMap[self.stepSize] = value
+        return self
+
+    def getStepSize(self):
+        """
+        Gets the value of stepSize or its default value.
+        """
+        return self.getOrDefault(self.stepSize)

From 65afd3ce8b8a0b00f4ea8294eac14b72e964872d Mon Sep 17 00:00:00 2001
From: Xiangrui Meng <meng@databricks.com>
Date: Fri, 8 May 2015 11:16:04 -0700
Subject: [PATCH 47/48] [SPARK-7474] [MLLIB] update ParamGridBuilder doctest

Multiline commands are properly handled in this PR. oefirouz

![screen shot 2015-05-07 at 10 53 25 pm](https://cloud.githubusercontent.com/assets/829644/7531290/02ad2fd4-f50c-11e4-8c04-e58d1a61ad69.png)

Author: Xiangrui Meng <meng@databricks.com>

Closes #6001 from mengxr/SPARK-7474 and squashes the following commits:

b94b11d [Xiangrui Meng] update ParamGridBuilder doctest
---
 python/pyspark/ml/tuning.py | 28 +++++++++++++---------------
 1 file changed, 13 insertions(+), 15 deletions(-)

diff --git a/python/pyspark/ml/tuning.py b/python/pyspark/ml/tuning.py
index 1e04c37fca53e..28e3727f2c064 100644
--- a/python/pyspark/ml/tuning.py
+++ b/python/pyspark/ml/tuning.py
@@ -27,24 +27,22 @@
 
 
 class ParamGridBuilder(object):
-    """
+    r"""
     Builder for a param grid used in grid search-based model selection.
 
-    >>> from classification import LogisticRegression
+    >>> from pyspark.ml.classification import LogisticRegression
     >>> lr = LogisticRegression()
-    >>> output = ParamGridBuilder().baseOn({lr.labelCol: 'l'}) \
-            .baseOn([lr.predictionCol, 'p']) \
-            .addGrid(lr.regParam, [1.0, 2.0, 3.0]) \
-            .addGrid(lr.maxIter, [1, 5]) \
-            .addGrid(lr.featuresCol, ['f']) \
-            .build()
-    >>> expected = [ \
-{lr.regParam: 1.0, lr.featuresCol: 'f', lr.maxIter: 1, lr.labelCol: 'l', lr.predictionCol: 'p'}, \
-{lr.regParam: 2.0, lr.featuresCol: 'f', lr.maxIter: 1, lr.labelCol: 'l', lr.predictionCol: 'p'}, \
-{lr.regParam: 3.0, lr.featuresCol: 'f', lr.maxIter: 1, lr.labelCol: 'l', lr.predictionCol: 'p'}, \
-{lr.regParam: 1.0, lr.featuresCol: 'f', lr.maxIter: 5, lr.labelCol: 'l', lr.predictionCol: 'p'}, \
-{lr.regParam: 2.0, lr.featuresCol: 'f', lr.maxIter: 5, lr.labelCol: 'l', lr.predictionCol: 'p'}, \
-{lr.regParam: 3.0, lr.featuresCol: 'f', lr.maxIter: 5, lr.labelCol: 'l', lr.predictionCol: 'p'}]
+    >>> output = ParamGridBuilder() \
+    ...     .baseOn({lr.labelCol: 'l'}) \
+    ...     .baseOn([lr.predictionCol, 'p']) \
+    ...     .addGrid(lr.regParam, [1.0, 2.0]) \
+    ...     .addGrid(lr.maxIter, [1, 5]) \
+    ...     .build()
+    >>> expected = [
+    ...     {lr.regParam: 1.0, lr.maxIter: 1, lr.labelCol: 'l', lr.predictionCol: 'p'},
+    ...     {lr.regParam: 2.0, lr.maxIter: 1, lr.labelCol: 'l', lr.predictionCol: 'p'},
+    ...     {lr.regParam: 1.0, lr.maxIter: 5, lr.labelCol: 'l', lr.predictionCol: 'p'},
+    ...     {lr.regParam: 2.0, lr.maxIter: 5, lr.labelCol: 'l', lr.predictionCol: 'p'}]
     >>> len(output) == len(expected)
     True
     >>> all([m in expected for m in output])

From 008a60dd371e76819d8e08ab638cac7b3a48c9fc Mon Sep 17 00:00:00 2001
From: hqzizania <qian.huang@intel.com>
Date: Fri, 8 May 2015 11:25:04 -0700
Subject: [PATCH 48/48] [SPARK-6824] Fill the docs for DataFrame API in SparkR

This patch also removes the RDD docs from being built as a part of roxygen just by the method to delete
" ' '" of " \#' ".

Author: hqzizania <qian.huang@intel.com>
Author: qhuang <qian.huang@intel.com>

Closes #5969 from hqzizania/R1 and squashes the following commits:

6d27696 [qhuang] fixes in NAMESPACE
eb4b095 [qhuang] remove more docs
6394579 [qhuang] remove RDD docs in generics.R
6813860 [hqzizania] Fill the docs for DataFrame API in SparkR
857220f [hqzizania] remove the pairRDD docs from being built as a part of roxygen
c045d64 [hqzizania] remove the RDD docs from being built as a part of roxygen
---
 R/pkg/DESCRIPTION    |    2 +-
 R/pkg/NAMESPACE      |    4 -
 R/pkg/R/DataFrame.R  |   95 +--
 R/pkg/R/RDD.R        | 1546 +++++++++++++++++++++---------------------
 R/pkg/R/SQLContext.R |   64 +-
 R/pkg/R/broadcast.R  |   64 +-
 R/pkg/R/context.R    |  240 +++----
 R/pkg/R/generics.R   |  318 ++++-----
 R/pkg/R/pairRDD.R    |  886 ++++++++++++------------
 9 files changed, 1610 insertions(+), 1609 deletions(-)

diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index 1c1779a763c7e..efc85bbc4b316 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -15,11 +15,11 @@ Suggests:
 Description: R frontend for Spark
 License: Apache License (== 2.0)
 Collate:
+    'schema.R'
     'generics.R'
     'jobj.R'
     'RDD.R'
     'pairRDD.R'
-    'schema.R'
     'column.R'
     'group.R'
     'DataFrame.R'
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 3fb92be0940b7..7611f479a628b 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -26,7 +26,6 @@ exportMethods("cache",
               "intersect",
               "isLocal",
               "join",
-              "length",
               "limit",
               "orderBy",
               "names",
@@ -101,9 +100,6 @@ export("cacheTable",
        "tables",
        "uncacheTable")
 
-export("sparkRSQL.init",
-       "sparkRHive.init")
-
 export("structField",
        "structField.jobj",
        "structField.character",
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 47d92f141cc7d..354642e7bc307 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -45,6 +45,9 @@ setMethod("initialize", "DataFrame", function(.Object, sdf, isCached) {
 
 #' @rdname DataFrame
 #' @export
+#'
+#' @param sdf A Java object reference to the backing Scala DataFrame
+#' @param isCached TRUE if the dataFrame is cached
 dataFrame <- function(sdf, isCached = FALSE) {
   new("DataFrame", sdf, isCached)
 }
@@ -244,7 +247,7 @@ setMethod("columns",
           })
 
 #' @rdname columns
-#' @export
+#' @aliases names,DataFrame,function-method
 setMethod("names",
           signature(x = "DataFrame"),
           function(x) {
@@ -399,23 +402,23 @@ setMethod("repartition",
             dataFrame(sdf)     
           })
 
-#' toJSON
-#'
-#' Convert the rows of a DataFrame into JSON objects and return an RDD where
-#' each element contains a JSON string.
-#'
-#' @param x A SparkSQL DataFrame
-#' @return A StringRRDD of JSON objects
-#' @rdname tojson
-#' @export
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' sqlCtx <- sparkRSQL.init(sc)
-#' path <- "path/to/file.json"
-#' df <- jsonFile(sqlCtx, path)
-#' newRDD <- toJSON(df)
-#'}
+# toJSON
+#
+# Convert the rows of a DataFrame into JSON objects and return an RDD where
+# each element contains a JSON string.
+#
+#@param x A SparkSQL DataFrame
+# @return A StringRRDD of JSON objects
+# @rdname tojson
+# @export
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# sqlCtx <- sparkRSQL.init(sc)
+# path <- "path/to/file.json"
+# df <- jsonFile(sqlCtx, path)
+# newRDD <- toJSON(df)
+#}
 setMethod("toJSON",
           signature(x = "DataFrame"),
           function(x) {
@@ -578,8 +581,8 @@ setMethod("limit",
             dataFrame(res)
           })
 
-# Take the first NUM rows of a DataFrame and return a the results as a data.frame
-
+#' Take the first NUM rows of a DataFrame and return a the results as a data.frame
+#' 
 #' @rdname take
 #' @export
 #' @examples
@@ -644,22 +647,22 @@ setMethod("first",
             take(x, 1)
           })
 
-#' toRDD()
-#' 
-#' Converts a Spark DataFrame to an RDD while preserving column names.
-#' 
-#' @param x A Spark DataFrame
-#' 
-#' @rdname DataFrame
-#' @export
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' sqlCtx <- sparkRSQL.init(sc)
-#' path <- "path/to/file.json"
-#' df <- jsonFile(sqlCtx, path)
-#' rdd <- toRDD(df)
-#' }
+# toRDD()
+# 
+# Converts a Spark DataFrame to an RDD while preserving column names.
+# 
+# @param x A Spark DataFrame
+# 
+# @rdname DataFrame
+# @export
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# sqlCtx <- sparkRSQL.init(sc)
+# path <- "path/to/file.json"
+# df <- jsonFile(sqlCtx, path)
+# rdd <- toRDD(df)
+# }
 setMethod("toRDD",
           signature(x = "DataFrame"),
           function(x) {
@@ -706,6 +709,7 @@ setMethod("groupBy",
 #'
 #' Compute aggregates by specifying a list of columns
 #'
+#' @param x a DataFrame
 #' @rdname DataFrame
 #' @export
 setMethod("agg",
@@ -721,7 +725,7 @@ setMethod("agg",
 # the requested map function.                                                     #
 ###################################################################################
 
-#' @rdname lapply
+# @rdname lapply
 setMethod("lapply",
           signature(X = "DataFrame", FUN = "function"),
           function(X, FUN) {
@@ -729,14 +733,14 @@ setMethod("lapply",
             lapply(rdd, FUN)
           })
 
-#' @rdname lapply
+# @rdname lapply
 setMethod("map",
           signature(X = "DataFrame", FUN = "function"),
           function(X, FUN) {
             lapply(X, FUN)
           })
 
-#' @rdname flatMap
+# @rdname flatMap
 setMethod("flatMap",
           signature(X = "DataFrame", FUN = "function"),
           function(X, FUN) {
@@ -744,7 +748,7 @@ setMethod("flatMap",
             flatMap(rdd, FUN)
           })
 
-#' @rdname lapplyPartition
+# @rdname lapplyPartition
 setMethod("lapplyPartition",
           signature(X = "DataFrame", FUN = "function"),
           function(X, FUN) {
@@ -752,14 +756,14 @@ setMethod("lapplyPartition",
             lapplyPartition(rdd, FUN)
           })
 
-#' @rdname lapplyPartition
+# @rdname lapplyPartition
 setMethod("mapPartitions",
           signature(X = "DataFrame", FUN = "function"),
           function(X, FUN) {
             lapplyPartition(X, FUN)
           })
 
-#' @rdname foreach
+# @rdname foreach
 setMethod("foreach",
           signature(x = "DataFrame", func = "function"),
           function(x, func) {
@@ -767,7 +771,7 @@ setMethod("foreach",
             foreach(rdd, func)
           })
 
-#' @rdname foreach
+# @rdname foreach
 setMethod("foreachPartition",
           signature(x = "DataFrame", func = "function"),
           function(x, func) {
@@ -788,6 +792,7 @@ setMethod("$", signature(x = "DataFrame"),
             getColumn(x, name)
           })
 
+#' @rdname select
 setMethod("$<-", signature(x = "DataFrame"),
           function(x, name, value) {
             stopifnot(class(value) == "Column" || is.null(value))
@@ -1009,7 +1014,7 @@ setMethod("sortDF",
           })
 
 #' @rdname sortDF
-#' @export
+#' @aliases orderBy,DataFrame,function-method
 setMethod("orderBy",
           signature(x = "DataFrame", col = "characterOrColumn"),
           function(x, col) {
@@ -1046,7 +1051,7 @@ setMethod("filter",
           })
 
 #' @rdname filter
-#' @export
+#' @aliases where,DataFrame,function-method
 setMethod("where",
           signature(x = "DataFrame", condition = "characterOrColumn"),
           function(x, condition) {
diff --git a/R/pkg/R/RDD.R b/R/pkg/R/RDD.R
index d1018c2361670..73999a6737032 100644
--- a/R/pkg/R/RDD.R
+++ b/R/pkg/R/RDD.R
@@ -19,16 +19,16 @@
 
 setOldClass("jobj")
 
-#' @title S4 class that represents an RDD
-#' @description RDD can be created using functions like
-#'              \code{parallelize}, \code{textFile} etc.
-#' @rdname RDD
-#' @seealso parallelize, textFile
-#'
-#' @slot env An R environment that stores bookkeeping states of the RDD
-#' @slot jrdd Java object reference to the backing JavaRDD
-#' to an RDD
-#' @export
+# @title S4 class that represents an RDD
+# @description RDD can be created using functions like
+#              \code{parallelize}, \code{textFile} etc.
+# @rdname RDD
+# @seealso parallelize, textFile
+#
+# @slot env An R environment that stores bookkeeping states of the RDD
+# @slot jrdd Java object reference to the backing JavaRDD
+# to an RDD
+# @export
 setClass("RDD",
          slots = list(env = "environment",
                       jrdd = "jobj"))
@@ -108,14 +108,14 @@ setMethod("initialize", "PipelinedRDD", function(.Object, prev, func, jrdd_val)
   .Object
 })
 
-#' @rdname RDD
-#' @export
-#'
-#' @param jrdd Java object reference to the backing JavaRDD
-#' @param serializedMode Use "byte" if the RDD stores data serialized in R, "string" if the RDD
-#' stores strings, and "row" if the RDD stores the rows of a DataFrame
-#' @param isCached TRUE if the RDD is cached
-#' @param isCheckpointed TRUE if the RDD has been checkpointed
+# @rdname RDD
+# @export
+#
+# @param jrdd Java object reference to the backing JavaRDD
+# @param serializedMode Use "byte" if the RDD stores data serialized in R, "string" if the RDD
+# stores strings, and "row" if the RDD stores the rows of a DataFrame
+# @param isCached TRUE if the RDD is cached
+# @param isCheckpointed TRUE if the RDD has been checkpointed
 RDD <- function(jrdd, serializedMode = "byte", isCached = FALSE,
                 isCheckpointed = FALSE) {
   new("RDD", jrdd, serializedMode, isCached, isCheckpointed)
@@ -200,19 +200,19 @@ setValidity("RDD",
 
 ############ Actions and Transformations ############
 
-#' Persist an RDD
-#'
-#' Persist this RDD with the default storage level (MEMORY_ONLY).
-#'
-#' @param x The RDD to cache
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, 1:10, 2L)
-#' cache(rdd)
-#'}
-#' @rdname cache-methods
-#' @aliases cache,RDD-method
+# Persist an RDD
+#
+# Persist this RDD with the default storage level (MEMORY_ONLY).
+#
+# @param x The RDD to cache
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, 1:10, 2L)
+# cache(rdd)
+#}
+# @rdname cache-methods
+# @aliases cache,RDD-method
 setMethod("cache",
           signature(x = "RDD"),
           function(x) {
@@ -221,22 +221,22 @@ setMethod("cache",
             x
           })
 
-#' Persist an RDD
-#'
-#' Persist this RDD with the specified storage level. For details of the
-#' supported storage levels, refer to
-#' http://spark.apache.org/docs/latest/programming-guide.html#rdd-persistence.
-#'
-#' @param x The RDD to persist
-#' @param newLevel The new storage level to be assigned
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, 1:10, 2L)
-#' persist(rdd, "MEMORY_AND_DISK")
-#'}
-#' @rdname persist
-#' @aliases persist,RDD-method
+# Persist an RDD
+#
+# Persist this RDD with the specified storage level. For details of the
+# supported storage levels, refer to
+# http://spark.apache.org/docs/latest/programming-guide.html#rdd-persistence.
+#
+# @param x The RDD to persist
+# @param newLevel The new storage level to be assigned
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, 1:10, 2L)
+# persist(rdd, "MEMORY_AND_DISK")
+#}
+# @rdname persist
+# @aliases persist,RDD-method
 setMethod("persist",
           signature(x = "RDD", newLevel = "character"),
           function(x, newLevel) {
@@ -245,21 +245,21 @@ setMethod("persist",
             x
           })
 
-#' Unpersist an RDD
-#'
-#' Mark the RDD as non-persistent, and remove all blocks for it from memory and
-#' disk.
-#'
-#' @param x The RDD to unpersist
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, 1:10, 2L)
-#' cache(rdd) # rdd@@env$isCached == TRUE
-#' unpersist(rdd) # rdd@@env$isCached == FALSE
-#'}
-#' @rdname unpersist-methods
-#' @aliases unpersist,RDD-method
+# Unpersist an RDD
+#
+# Mark the RDD as non-persistent, and remove all blocks for it from memory and
+# disk.
+#
+# @param x The RDD to unpersist
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, 1:10, 2L)
+# cache(rdd) # rdd@@env$isCached == TRUE
+# unpersist(rdd) # rdd@@env$isCached == FALSE
+#}
+# @rdname unpersist-methods
+# @aliases unpersist,RDD-method
 setMethod("unpersist",
           signature(x = "RDD"),
           function(x) {
@@ -268,24 +268,24 @@ setMethod("unpersist",
             x
           })
 
-#' Checkpoint an RDD
-#'
-#' Mark this RDD for checkpointing. It will be saved to a file inside the
-#' checkpoint directory set with setCheckpointDir() and all references to its
-#' parent RDDs will be removed. This function must be called before any job has
-#' been executed on this RDD. It is strongly recommended that this RDD is
-#' persisted in memory, otherwise saving it on a file will require recomputation.
-#'
-#' @param x The RDD to checkpoint
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' setCheckpointDir(sc, "checkpoint")
-#' rdd <- parallelize(sc, 1:10, 2L)
-#' checkpoint(rdd)
-#'}
-#' @rdname checkpoint-methods
-#' @aliases checkpoint,RDD-method
+# Checkpoint an RDD
+#
+# Mark this RDD for checkpointing. It will be saved to a file inside the
+# checkpoint directory set with setCheckpointDir() and all references to its
+# parent RDDs will be removed. This function must be called before any job has
+# been executed on this RDD. It is strongly recommended that this RDD is
+# persisted in memory, otherwise saving it on a file will require recomputation.
+#
+# @param x The RDD to checkpoint
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# setCheckpointDir(sc, "checkpoint")
+# rdd <- parallelize(sc, 1:10, 2L)
+# checkpoint(rdd)
+#}
+# @rdname checkpoint-methods
+# @aliases checkpoint,RDD-method
 setMethod("checkpoint",
           signature(x = "RDD"),
           function(x) {
@@ -295,18 +295,18 @@ setMethod("checkpoint",
             x
           })
 
-#' Gets the number of partitions of an RDD
-#'
-#' @param x A RDD.
-#' @return the number of partitions of rdd as an integer.
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, 1:10, 2L)
-#' numPartitions(rdd)  # 2L
-#'}
-#' @rdname numPartitions
-#' @aliases numPartitions,RDD-method
+# Gets the number of partitions of an RDD
+#
+# @param x A RDD.
+# @return the number of partitions of rdd as an integer.
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, 1:10, 2L)
+# numPartitions(rdd)  # 2L
+#}
+# @rdname numPartitions
+# @aliases numPartitions,RDD-method
 setMethod("numPartitions",
           signature(x = "RDD"),
           function(x) {
@@ -315,24 +315,24 @@ setMethod("numPartitions",
             callJMethod(partitions, "size")
           })
 
-#' Collect elements of an RDD
-#'
-#' @description
-#' \code{collect} returns a list that contains all of the elements in this RDD.
-#'
-#' @param x The RDD to collect
-#' @param ... Other optional arguments to collect
-#' @param flatten FALSE if the list should not flattened
-#' @return a list containing elements in the RDD
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, 1:10, 2L)
-#' collect(rdd) # list from 1 to 10
-#' collectPartition(rdd, 0L) # list from 1 to 5
-#'}
-#' @rdname collect-methods
-#' @aliases collect,RDD-method
+# Collect elements of an RDD
+#
+# @description
+# \code{collect} returns a list that contains all of the elements in this RDD.
+#
+# @param x The RDD to collect
+# @param ... Other optional arguments to collect
+# @param flatten FALSE if the list should not flattened
+# @return a list containing elements in the RDD
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, 1:10, 2L)
+# collect(rdd) # list from 1 to 10
+# collectPartition(rdd, 0L) # list from 1 to 5
+#}
+# @rdname collect-methods
+# @aliases collect,RDD-method
 setMethod("collect",
           signature(x = "RDD"),
           function(x, flatten = TRUE) {
@@ -343,12 +343,12 @@ setMethod("collect",
           })
 
 
-#' @description
-#' \code{collectPartition} returns a list that contains all of the elements
-#' in the specified partition of the RDD.
-#' @param partitionId the partition to collect (starts from 0)
-#' @rdname collect-methods
-#' @aliases collectPartition,integer,RDD-method
+# @description
+# \code{collectPartition} returns a list that contains all of the elements
+# in the specified partition of the RDD.
+# @param partitionId the partition to collect (starts from 0)
+# @rdname collect-methods
+# @aliases collectPartition,integer,RDD-method
 setMethod("collectPartition",
           signature(x = "RDD", partitionId = "integer"),
           function(x, partitionId) {
@@ -361,17 +361,17 @@ setMethod("collectPartition",
               serializedMode = getSerializedMode(x))
           })
 
-#' @description
-#' \code{collectAsMap} returns a named list as a map that contains all of the elements
-#' in a key-value pair RDD. 
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, list(list(1, 2), list(3, 4)), 2L)
-#' collectAsMap(rdd) # list(`1` = 2, `3` = 4)
-#'}
-#' @rdname collect-methods
-#' @aliases collectAsMap,RDD-method
+# @description
+# \code{collectAsMap} returns a named list as a map that contains all of the elements
+# in a key-value pair RDD. 
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, list(list(1, 2), list(3, 4)), 2L)
+# collectAsMap(rdd) # list(`1` = 2, `3` = 4)
+#}
+# @rdname collect-methods
+# @aliases collectAsMap,RDD-method
 setMethod("collectAsMap",
           signature(x = "RDD"),
           function(x) {
@@ -381,19 +381,19 @@ setMethod("collectAsMap",
             as.list(map)
           })
 
-#' Return the number of elements in the RDD.
-#'
-#' @param x The RDD to count
-#' @return number of elements in the RDD.
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, 1:10)
-#' count(rdd) # 10
-#' length(rdd) # Same as count
-#'}
-#' @rdname count
-#' @aliases count,RDD-method
+# Return the number of elements in the RDD.
+#
+# @param x The RDD to count
+# @return number of elements in the RDD.
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, 1:10)
+# count(rdd) # 10
+# length(rdd) # Same as count
+#}
+# @rdname count
+# @aliases count,RDD-method
 setMethod("count",
           signature(x = "RDD"),
           function(x) {
@@ -405,31 +405,31 @@ setMethod("count",
             sum(as.integer(vals))
           })
 
-#' Return the number of elements in the RDD
-#' @export
-#' @rdname count
+# Return the number of elements in the RDD
+# @export
+# @rdname count
 setMethod("length",
           signature(x = "RDD"),
           function(x) {
             count(x)
           })
 
-#' Return the count of each unique value in this RDD as a list of
-#' (value, count) pairs.
-#'
-#' Same as countByValue in Spark.
-#'
-#' @param x The RDD to count
-#' @return list of (value, count) pairs, where count is number of each unique
-#' value in rdd.
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, c(1,2,3,2,1))
-#' countByValue(rdd) # (1,2L), (2,2L), (3,1L)
-#'}
-#' @rdname countByValue
-#' @aliases countByValue,RDD-method
+# Return the count of each unique value in this RDD as a list of
+# (value, count) pairs.
+#
+# Same as countByValue in Spark.
+#
+# @param x The RDD to count
+# @return list of (value, count) pairs, where count is number of each unique
+# value in rdd.
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, c(1,2,3,2,1))
+# countByValue(rdd) # (1,2L), (2,2L), (3,1L)
+#}
+# @rdname countByValue
+# @aliases countByValue,RDD-method
 setMethod("countByValue",
           signature(x = "RDD"),
           function(x) {
@@ -437,23 +437,23 @@ setMethod("countByValue",
             collect(reduceByKey(ones, `+`, numPartitions(x)))
           })
 
-#' Apply a function to all elements
-#'
-#' This function creates a new RDD by applying the given transformation to all
-#' elements of the given RDD
-#'
-#' @param X The RDD to apply the transformation.
-#' @param FUN the transformation to apply on each element
-#' @return a new RDD created by the transformation.
-#' @rdname lapply
-#' @aliases lapply
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, 1:10)
-#' multiplyByTwo <- lapply(rdd, function(x) { x * 2 })
-#' collect(multiplyByTwo) # 2,4,6...
-#'}
+# Apply a function to all elements
+#
+# This function creates a new RDD by applying the given transformation to all
+# elements of the given RDD
+#
+# @param X The RDD to apply the transformation.
+# @param FUN the transformation to apply on each element
+# @return a new RDD created by the transformation.
+# @rdname lapply
+# @aliases lapply
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, 1:10)
+# multiplyByTwo <- lapply(rdd, function(x) { x * 2 })
+# collect(multiplyByTwo) # 2,4,6...
+#}
 setMethod("lapply",
           signature(X = "RDD", FUN = "function"),
           function(X, FUN) {
@@ -463,31 +463,31 @@ setMethod("lapply",
             lapplyPartitionsWithIndex(X, func)
           })
 
-#' @rdname lapply
-#' @aliases map,RDD,function-method
+# @rdname lapply
+# @aliases map,RDD,function-method
 setMethod("map",
           signature(X = "RDD", FUN = "function"),
           function(X, FUN) {
             lapply(X, FUN)
           })
 
-#' Flatten results after apply a function to all elements
-#'
-#' This function return a new RDD by first applying a function to all
-#' elements of this RDD, and then flattening the results.
-#'
-#' @param X The RDD to apply the transformation.
-#' @param FUN the transformation to apply on each element
-#' @return a new RDD created by the transformation.
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, 1:10)
-#' multiplyByTwo <- flatMap(rdd, function(x) { list(x*2, x*10) })
-#' collect(multiplyByTwo) # 2,20,4,40,6,60...
-#'}
-#' @rdname flatMap
-#' @aliases flatMap,RDD,function-method
+# Flatten results after apply a function to all elements
+#
+# This function return a new RDD by first applying a function to all
+# elements of this RDD, and then flattening the results.
+#
+# @param X The RDD to apply the transformation.
+# @param FUN the transformation to apply on each element
+# @return a new RDD created by the transformation.
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, 1:10)
+# multiplyByTwo <- flatMap(rdd, function(x) { list(x*2, x*10) })
+# collect(multiplyByTwo) # 2,20,4,40,6,60...
+#}
+# @rdname flatMap
+# @aliases flatMap,RDD,function-method
 setMethod("flatMap",
           signature(X = "RDD", FUN = "function"),
           function(X, FUN) {
@@ -500,83 +500,83 @@ setMethod("flatMap",
             lapplyPartition(X, partitionFunc)
           })
 
-#' Apply a function to each partition of an RDD
-#'
-#' Return a new RDD by applying a function to each partition of this RDD.
-#'
-#' @param X The RDD to apply the transformation.
-#' @param FUN the transformation to apply on each partition.
-#' @return a new RDD created by the transformation.
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, 1:10)
-#' partitionSum <- lapplyPartition(rdd, function(part) { Reduce("+", part) })
-#' collect(partitionSum) # 15, 40
-#'}
-#' @rdname lapplyPartition
-#' @aliases lapplyPartition,RDD,function-method
+# Apply a function to each partition of an RDD
+#
+# Return a new RDD by applying a function to each partition of this RDD.
+#
+# @param X The RDD to apply the transformation.
+# @param FUN the transformation to apply on each partition.
+# @return a new RDD created by the transformation.
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, 1:10)
+# partitionSum <- lapplyPartition(rdd, function(part) { Reduce("+", part) })
+# collect(partitionSum) # 15, 40
+#}
+# @rdname lapplyPartition
+# @aliases lapplyPartition,RDD,function-method
 setMethod("lapplyPartition",
           signature(X = "RDD", FUN = "function"),
           function(X, FUN) {
             lapplyPartitionsWithIndex(X, function(s, part) { FUN(part) })
           })
 
-#' mapPartitions is the same as lapplyPartition.
-#'
-#' @rdname lapplyPartition
-#' @aliases mapPartitions,RDD,function-method
+# mapPartitions is the same as lapplyPartition.
+#
+# @rdname lapplyPartition
+# @aliases mapPartitions,RDD,function-method
 setMethod("mapPartitions",
           signature(X = "RDD", FUN = "function"),
           function(X, FUN) {
             lapplyPartition(X, FUN)
           })
 
-#' Return a new RDD by applying a function to each partition of this RDD, while
-#' tracking the index of the original partition.
-#'
-#' @param X The RDD to apply the transformation.
-#' @param FUN the transformation to apply on each partition; takes the partition
-#'        index and a list of elements in the particular partition.
-#' @return a new RDD created by the transformation.
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, 1:10, 5L)
-#' prod <- lapplyPartitionsWithIndex(rdd, function(partIndex, part) {
-#'                                          partIndex * Reduce("+", part) })
-#' collect(prod, flatten = FALSE) # 0, 7, 22, 45, 76
-#'}
-#' @rdname lapplyPartitionsWithIndex
-#' @aliases lapplyPartitionsWithIndex,RDD,function-method
+# Return a new RDD by applying a function to each partition of this RDD, while
+# tracking the index of the original partition.
+#
+# @param X The RDD to apply the transformation.
+# @param FUN the transformation to apply on each partition; takes the partition
+#        index and a list of elements in the particular partition.
+# @return a new RDD created by the transformation.
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, 1:10, 5L)
+# prod <- lapplyPartitionsWithIndex(rdd, function(partIndex, part) {
+#                                          partIndex * Reduce("+", part) })
+# collect(prod, flatten = FALSE) # 0, 7, 22, 45, 76
+#}
+# @rdname lapplyPartitionsWithIndex
+# @aliases lapplyPartitionsWithIndex,RDD,function-method
 setMethod("lapplyPartitionsWithIndex",
           signature(X = "RDD", FUN = "function"),
           function(X, FUN) {
             PipelinedRDD(X, FUN)
           })
 
-#' @rdname lapplyPartitionsWithIndex
-#' @aliases mapPartitionsWithIndex,RDD,function-method
+# @rdname lapplyPartitionsWithIndex
+# @aliases mapPartitionsWithIndex,RDD,function-method
 setMethod("mapPartitionsWithIndex",
           signature(X = "RDD", FUN = "function"),
           function(X, FUN) {
             lapplyPartitionsWithIndex(X, FUN)
           })
 
-#' This function returns a new RDD containing only the elements that satisfy
-#' a predicate (i.e. returning TRUE in a given logical function).
-#' The same as `filter()' in Spark.
-#'
-#' @param x The RDD to be filtered.
-#' @param f A unary predicate function.
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, 1:10)
-#' unlist(collect(filterRDD(rdd, function (x) { x < 3 }))) # c(1, 2)
-#'}
-#' @rdname filterRDD
-#' @aliases filterRDD,RDD,function-method
+# This function returns a new RDD containing only the elements that satisfy
+# a predicate (i.e. returning TRUE in a given logical function).
+# The same as `filter()' in Spark.
+#
+# @param x The RDD to be filtered.
+# @param f A unary predicate function.
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, 1:10)
+# unlist(collect(filterRDD(rdd, function (x) { x < 3 }))) # c(1, 2)
+#}
+# @rdname filterRDD
+# @aliases filterRDD,RDD,function-method
 setMethod("filterRDD",
           signature(x = "RDD", f = "function"),
           function(x, f) {
@@ -586,30 +586,30 @@ setMethod("filterRDD",
             lapplyPartition(x, filter.func)
           })
 
-#' @rdname filterRDD
-#' @aliases Filter
+# @rdname filterRDD
+# @aliases Filter
 setMethod("Filter",
           signature(f = "function", x = "RDD"),
           function(f, x) {
             filterRDD(x, f)
           })
 
-#' Reduce across elements of an RDD.
-#'
-#' This function reduces the elements of this RDD using the
-#' specified commutative and associative binary operator.
-#'
-#' @param x The RDD to reduce
-#' @param func Commutative and associative function to apply on elements
-#'             of the RDD.
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, 1:10)
-#' reduce(rdd, "+") # 55
-#'}
-#' @rdname reduce
-#' @aliases reduce,RDD,ANY-method
+# Reduce across elements of an RDD.
+#
+# This function reduces the elements of this RDD using the
+# specified commutative and associative binary operator.
+#
+# @param x The RDD to reduce
+# @param func Commutative and associative function to apply on elements
+#             of the RDD.
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, 1:10)
+# reduce(rdd, "+") # 55
+#}
+# @rdname reduce
+# @aliases reduce,RDD,ANY-method
 setMethod("reduce",
           signature(x = "RDD", func = "ANY"),
           function(x, func) {
@@ -623,70 +623,70 @@ setMethod("reduce",
             Reduce(func, partitionList)
           })
 
-#' Get the maximum element of an RDD.
-#'
-#' @param x The RDD to get the maximum element from
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, 1:10)
-#' maximum(rdd) # 10
-#'}
-#' @rdname maximum
-#' @aliases maximum,RDD
+# Get the maximum element of an RDD.
+#
+# @param x The RDD to get the maximum element from
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, 1:10)
+# maximum(rdd) # 10
+#}
+# @rdname maximum
+# @aliases maximum,RDD
 setMethod("maximum",
           signature(x = "RDD"),
           function(x) {
             reduce(x, max)
           })
 
-#' Get the minimum element of an RDD.
-#'
-#' @param x The RDD to get the minimum element from
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, 1:10)
-#' minimum(rdd) # 1
-#'}
-#' @rdname minimum
-#' @aliases minimum,RDD
+# Get the minimum element of an RDD.
+#
+# @param x The RDD to get the minimum element from
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, 1:10)
+# minimum(rdd) # 1
+#}
+# @rdname minimum
+# @aliases minimum,RDD
 setMethod("minimum",
           signature(x = "RDD"),
           function(x) {
             reduce(x, min)
           })
 
-#' Add up the elements in an RDD.
-#'
-#' @param x The RDD to add up the elements in
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, 1:10)
-#' sumRDD(rdd) # 55
-#'}
-#' @rdname sumRDD 
-#' @aliases sumRDD,RDD
+# Add up the elements in an RDD.
+#
+# @param x The RDD to add up the elements in
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, 1:10)
+# sumRDD(rdd) # 55
+#}
+# @rdname sumRDD 
+# @aliases sumRDD,RDD
 setMethod("sumRDD",
           signature(x = "RDD"),
           function(x) {
             reduce(x, "+")
           })
 
-#' Applies a function to all elements in an RDD, and force evaluation.
-#'
-#' @param x The RDD to apply the function
-#' @param func The function to be applied.
-#' @return invisible NULL.
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, 1:10)
-#' foreach(rdd, function(x) { save(x, file=...) })
-#'}
-#' @rdname foreach
-#' @aliases foreach,RDD,function-method
+# Applies a function to all elements in an RDD, and force evaluation.
+#
+# @param x The RDD to apply the function
+# @param func The function to be applied.
+# @return invisible NULL.
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, 1:10)
+# foreach(rdd, function(x) { save(x, file=...) })
+#}
+# @rdname foreach
+# @aliases foreach,RDD,function-method
 setMethod("foreach",
           signature(x = "RDD", func = "function"),
           function(x, func) {
@@ -697,37 +697,37 @@ setMethod("foreach",
             invisible(collect(mapPartitions(x, partition.func)))
           })
 
-#' Applies a function to each partition in an RDD, and force evaluation.
-#'
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, 1:10)
-#' foreachPartition(rdd, function(part) { save(part, file=...); NULL })
-#'}
-#' @rdname foreach
-#' @aliases foreachPartition,RDD,function-method
+# Applies a function to each partition in an RDD, and force evaluation.
+#
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, 1:10)
+# foreachPartition(rdd, function(part) { save(part, file=...); NULL })
+#}
+# @rdname foreach
+# @aliases foreachPartition,RDD,function-method
 setMethod("foreachPartition",
           signature(x = "RDD", func = "function"),
           function(x, func) {
             invisible(collect(mapPartitions(x, func)))
           })
 
-#' Take elements from an RDD.
-#'
-#' This function takes the first NUM elements in the RDD and
-#' returns them in a list.
-#'
-#' @param x The RDD to take elements from
-#' @param num Number of elements to take
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, 1:10)
-#' take(rdd, 2L) # list(1, 2)
-#'}
-#' @rdname take
-#' @aliases take,RDD,numeric-method
+# Take elements from an RDD.
+#
+# This function takes the first NUM elements in the RDD and
+# returns them in a list.
+#
+# @param x The RDD to take elements from
+# @param num Number of elements to take
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, 1:10)
+# take(rdd, 2L) # list(1, 2)
+#}
+# @rdname take
+# @aliases take,RDD,numeric-method
 setMethod("take",
           signature(x = "RDD", num = "numeric"),
           function(x, num) {
@@ -762,39 +762,39 @@ setMethod("take",
           })
 
 
-#' First
-#'
-#' Return the first element of an RDD
-#'
-#' @rdname first
-#' @export
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, 1:10)
-#' first(rdd)
-#' }
+# First
+#
+# Return the first element of an RDD
+#
+# @rdname first
+# @export
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, 1:10)
+# first(rdd)
+# }
 setMethod("first",
           signature(x = "RDD"),
           function(x) {
             take(x, 1)[[1]]
           })
 
-#' Removes the duplicates from RDD.
-#'
-#' This function returns a new RDD containing the distinct elements in the
-#' given RDD. The same as `distinct()' in Spark.
-#'
-#' @param x The RDD to remove duplicates from.
-#' @param numPartitions Number of partitions to create.
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, c(1,2,2,3,3,3))
-#' sort(unlist(collect(distinct(rdd)))) # c(1, 2, 3)
-#'}
-#' @rdname distinct
-#' @aliases distinct,RDD-method
+# Removes the duplicates from RDD.
+#
+# This function returns a new RDD containing the distinct elements in the
+# given RDD. The same as `distinct()' in Spark.
+#
+# @param x The RDD to remove duplicates from.
+# @param numPartitions Number of partitions to create.
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, c(1,2,2,3,3,3))
+# sort(unlist(collect(distinct(rdd)))) # c(1, 2, 3)
+#}
+# @rdname distinct
+# @aliases distinct,RDD-method
 setMethod("distinct",
           signature(x = "RDD"),
           function(x, numPartitions = SparkR:::numPartitions(x)) {
@@ -806,24 +806,24 @@ setMethod("distinct",
             resRDD
           })
 
-#' Return an RDD that is a sampled subset of the given RDD.
-#'
-#' The same as `sample()' in Spark. (We rename it due to signature
-#' inconsistencies with the `sample()' function in R's base package.)
-#'
-#' @param x The RDD to sample elements from
-#' @param withReplacement Sampling with replacement or not
-#' @param fraction The (rough) sample target fraction
-#' @param seed Randomness seed value
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, 1:10)
-#' collect(sampleRDD(rdd, FALSE, 0.5, 1618L)) # ~5 distinct elements
-#' collect(sampleRDD(rdd, TRUE, 0.5, 9L)) # ~5 elements possibly with duplicates
-#'}
-#' @rdname sampleRDD
-#' @aliases sampleRDD,RDD
+# Return an RDD that is a sampled subset of the given RDD.
+#
+# The same as `sample()' in Spark. (We rename it due to signature
+# inconsistencies with the `sample()' function in R's base package.)
+#
+# @param x The RDD to sample elements from
+# @param withReplacement Sampling with replacement or not
+# @param fraction The (rough) sample target fraction
+# @param seed Randomness seed value
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, 1:10)
+# collect(sampleRDD(rdd, FALSE, 0.5, 1618L)) # ~5 distinct elements
+# collect(sampleRDD(rdd, TRUE, 0.5, 9L)) # ~5 elements possibly with duplicates
+#}
+# @rdname sampleRDD
+# @aliases sampleRDD,RDD
 setMethod("sampleRDD",
           signature(x = "RDD", withReplacement = "logical",
                     fraction = "numeric", seed = "integer"),
@@ -867,23 +867,23 @@ setMethod("sampleRDD",
             lapplyPartitionsWithIndex(x, samplingFunc)
           })
 
-#' Return a list of the elements that are a sampled subset of the given RDD.
-#'
-#' @param x The RDD to sample elements from
-#' @param withReplacement Sampling with replacement or not
-#' @param num Number of elements to return
-#' @param seed Randomness seed value
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, 1:100)
-#' # exactly 5 elements sampled, which may not be distinct
-#' takeSample(rdd, TRUE, 5L, 1618L)
-#' # exactly 5 distinct elements sampled
-#' takeSample(rdd, FALSE, 5L, 16181618L)
-#'}
-#' @rdname takeSample
-#' @aliases takeSample,RDD
+# Return a list of the elements that are a sampled subset of the given RDD.
+#
+# @param x The RDD to sample elements from
+# @param withReplacement Sampling with replacement or not
+# @param num Number of elements to return
+# @param seed Randomness seed value
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, 1:100)
+# # exactly 5 elements sampled, which may not be distinct
+# takeSample(rdd, TRUE, 5L, 1618L)
+# # exactly 5 distinct elements sampled
+# takeSample(rdd, FALSE, 5L, 16181618L)
+#}
+# @rdname takeSample
+# @aliases takeSample,RDD
 setMethod("takeSample", signature(x = "RDD", withReplacement = "logical",
                                   num = "integer", seed = "integer"),
           function(x, withReplacement, num, seed) {
@@ -930,18 +930,18 @@ setMethod("takeSample", signature(x = "RDD", withReplacement = "logical",
             sample(samples)[1:total]
           })
 
-#' Creates tuples of the elements in this RDD by applying a function.
-#'
-#' @param x The RDD.
-#' @param func The function to be applied.
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, list(1, 2, 3))
-#' collect(keyBy(rdd, function(x) { x*x })) # list(list(1, 1), list(4, 2), list(9, 3))
-#'}
-#' @rdname keyBy
-#' @aliases keyBy,RDD
+# Creates tuples of the elements in this RDD by applying a function.
+#
+# @param x The RDD.
+# @param func The function to be applied.
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, list(1, 2, 3))
+# collect(keyBy(rdd, function(x) { x*x })) # list(list(1, 1), list(4, 2), list(9, 3))
+#}
+# @rdname keyBy
+# @aliases keyBy,RDD
 setMethod("keyBy",
           signature(x = "RDD", func = "function"),
           function(x, func) {
@@ -951,44 +951,44 @@ setMethod("keyBy",
             lapply(x, apply.func)
           })
 
-#' Return a new RDD that has exactly numPartitions partitions.
-#' Can increase or decrease the level of parallelism in this RDD. Internally,
-#' this uses a shuffle to redistribute data.
-#' If you are decreasing the number of partitions in this RDD, consider using
-#' coalesce, which can avoid performing a shuffle.
-#'
-#' @param x The RDD.
-#' @param numPartitions Number of partitions to create.
-#' @seealso coalesce
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, list(1, 2, 3, 4, 5, 6, 7), 4L)
-#' numPartitions(rdd)                   # 4
-#' numPartitions(repartition(rdd, 2L))  # 2
-#'}
-#' @rdname repartition
-#' @aliases repartition,RDD
+# Return a new RDD that has exactly numPartitions partitions.
+# Can increase or decrease the level of parallelism in this RDD. Internally,
+# this uses a shuffle to redistribute data.
+# If you are decreasing the number of partitions in this RDD, consider using
+# coalesce, which can avoid performing a shuffle.
+#
+# @param x The RDD.
+# @param numPartitions Number of partitions to create.
+# @seealso coalesce
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, list(1, 2, 3, 4, 5, 6, 7), 4L)
+# numPartitions(rdd)                   # 4
+# numPartitions(repartition(rdd, 2L))  # 2
+#}
+# @rdname repartition
+# @aliases repartition,RDD
 setMethod("repartition",
           signature(x = "RDD", numPartitions = "numeric"),
           function(x, numPartitions) {
             coalesce(x, numPartitions, TRUE)
           })
 
-#' Return a new RDD that is reduced into numPartitions partitions.
-#'
-#' @param x The RDD.
-#' @param numPartitions Number of partitions to create.
-#' @seealso repartition
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, list(1, 2, 3, 4, 5), 3L)
-#' numPartitions(rdd)               # 3
-#' numPartitions(coalesce(rdd, 1L)) # 1
-#'}
-#' @rdname coalesce
-#' @aliases coalesce,RDD
+# Return a new RDD that is reduced into numPartitions partitions.
+#
+# @param x The RDD.
+# @param numPartitions Number of partitions to create.
+# @seealso repartition
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, list(1, 2, 3, 4, 5), 3L)
+# numPartitions(rdd)               # 3
+# numPartitions(coalesce(rdd, 1L)) # 1
+#}
+# @rdname coalesce
+# @aliases coalesce,RDD
 setMethod("coalesce",
            signature(x = "RDD", numPartitions = "numeric"),
            function(x, numPartitions, shuffle = FALSE) {
@@ -1012,19 +1012,19 @@ setMethod("coalesce",
              }
            })
 
-#' Save this RDD as a SequenceFile of serialized objects.
-#'
-#' @param x The RDD to save
-#' @param path The directory where the file is saved
-#' @seealso objectFile
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, 1:3)
-#' saveAsObjectFile(rdd, "/tmp/sparkR-tmp")
-#'}
-#' @rdname saveAsObjectFile
-#' @aliases saveAsObjectFile,RDD
+# Save this RDD as a SequenceFile of serialized objects.
+#
+# @param x The RDD to save
+# @param path The directory where the file is saved
+# @seealso objectFile
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, 1:3)
+# saveAsObjectFile(rdd, "/tmp/sparkR-tmp")
+#}
+# @rdname saveAsObjectFile
+# @aliases saveAsObjectFile,RDD
 setMethod("saveAsObjectFile",
           signature(x = "RDD", path = "character"),
           function(x, path) {
@@ -1037,18 +1037,18 @@ setMethod("saveAsObjectFile",
             invisible(callJMethod(getJRDD(x), "saveAsObjectFile", path))
           })
 
-#' Save this RDD as a text file, using string representations of elements.
-#'
-#' @param x The RDD to save
-#' @param path The directory where the partitions of the text file are saved
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, 1:3)
-#' saveAsTextFile(rdd, "/tmp/sparkR-tmp")
-#'}
-#' @rdname saveAsTextFile
-#' @aliases saveAsTextFile,RDD
+# Save this RDD as a text file, using string representations of elements.
+#
+# @param x The RDD to save
+# @param path The directory where the partitions of the text file are saved
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, 1:3)
+# saveAsTextFile(rdd, "/tmp/sparkR-tmp")
+#}
+# @rdname saveAsTextFile
+# @aliases saveAsTextFile,RDD
 setMethod("saveAsTextFile",
           signature(x = "RDD", path = "character"),
           function(x, path) {
@@ -1061,21 +1061,21 @@ setMethod("saveAsTextFile",
               callJMethod(getJRDD(stringRdd, serializedMode = "string"), "saveAsTextFile", path))
           })
 
-#' Sort an RDD by the given key function.
-#'
-#' @param x An RDD to be sorted.
-#' @param func A function used to compute the sort key for each element.
-#' @param ascending A flag to indicate whether the sorting is ascending or descending.
-#' @param numPartitions Number of partitions to create.
-#' @return An RDD where all elements are sorted.
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, list(3, 2, 1))
-#' collect(sortBy(rdd, function(x) { x })) # list (1, 2, 3)
-#'}
-#' @rdname sortBy
-#' @aliases sortBy,RDD,RDD-method
+# Sort an RDD by the given key function.
+#
+# @param x An RDD to be sorted.
+# @param func A function used to compute the sort key for each element.
+# @param ascending A flag to indicate whether the sorting is ascending or descending.
+# @param numPartitions Number of partitions to create.
+# @return An RDD where all elements are sorted.
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, list(3, 2, 1))
+# collect(sortBy(rdd, function(x) { x })) # list (1, 2, 3)
+#}
+# @rdname sortBy
+# @aliases sortBy,RDD,RDD-method
 setMethod("sortBy",
           signature(x = "RDD", func = "function"),
           function(x, func, ascending = TRUE, numPartitions = SparkR:::numPartitions(x)) {
@@ -1137,97 +1137,97 @@ takeOrderedElem <- function(x, num, ascending = TRUE) {
   resList
 }
 
-#' Returns the first N elements from an RDD in ascending order.
-#'
-#' @param x An RDD.
-#' @param num Number of elements to return.
-#' @return The first N elements from the RDD in ascending order.
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, list(10, 1, 2, 9, 3, 4, 5, 6, 7))
-#' takeOrdered(rdd, 6L) # list(1, 2, 3, 4, 5, 6)
-#'}
-#' @rdname takeOrdered
-#' @aliases takeOrdered,RDD,RDD-method
+# Returns the first N elements from an RDD in ascending order.
+#
+# @param x An RDD.
+# @param num Number of elements to return.
+# @return The first N elements from the RDD in ascending order.
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, list(10, 1, 2, 9, 3, 4, 5, 6, 7))
+# takeOrdered(rdd, 6L) # list(1, 2, 3, 4, 5, 6)
+#}
+# @rdname takeOrdered
+# @aliases takeOrdered,RDD,RDD-method
 setMethod("takeOrdered",
           signature(x = "RDD", num = "integer"),
           function(x, num) {          
             takeOrderedElem(x, num)
           })
 
-#' Returns the top N elements from an RDD.
-#'
-#' @param x An RDD.
-#' @param num Number of elements to return.
-#' @return The top N elements from the RDD.
-#' @rdname top
-#' @export
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, list(10, 1, 2, 9, 3, 4, 5, 6, 7))
-#' top(rdd, 6L) # list(10, 9, 7, 6, 5, 4)
-#'}
-#' @rdname top
-#' @aliases top,RDD,RDD-method
+# Returns the top N elements from an RDD.
+#
+# @param x An RDD.
+# @param num Number of elements to return.
+# @return The top N elements from the RDD.
+# @rdname top
+# @export
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, list(10, 1, 2, 9, 3, 4, 5, 6, 7))
+# top(rdd, 6L) # list(10, 9, 7, 6, 5, 4)
+#}
+# @rdname top
+# @aliases top,RDD,RDD-method
 setMethod("top",
           signature(x = "RDD", num = "integer"),
           function(x, num) {          
             takeOrderedElem(x, num, FALSE)
           })
 
-#' Fold an RDD using a given associative function and a neutral "zero value".
-#'
-#' Aggregate the elements of each partition, and then the results for all the
-#' partitions, using a given associative function and a neutral "zero value".
-#' 
-#' @param x An RDD.
-#' @param zeroValue A neutral "zero value".
-#' @param op An associative function for the folding operation.
-#' @return The folding result.
-#' @rdname fold
-#' @seealso reduce
-#' @export
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, list(1, 2, 3, 4, 5))
-#' fold(rdd, 0, "+") # 15
-#'}
-#' @rdname fold
-#' @aliases fold,RDD,RDD-method
+# Fold an RDD using a given associative function and a neutral "zero value".
+#
+# Aggregate the elements of each partition, and then the results for all the
+# partitions, using a given associative function and a neutral "zero value".
+# 
+# @param x An RDD.
+# @param zeroValue A neutral "zero value".
+# @param op An associative function for the folding operation.
+# @return The folding result.
+# @rdname fold
+# @seealso reduce
+# @export
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, list(1, 2, 3, 4, 5))
+# fold(rdd, 0, "+") # 15
+#}
+# @rdname fold
+# @aliases fold,RDD,RDD-method
 setMethod("fold",
           signature(x = "RDD", zeroValue = "ANY", op = "ANY"),
           function(x, zeroValue, op) {
             aggregateRDD(x, zeroValue, op, op)
           })
 
-#' Aggregate an RDD using the given combine functions and a neutral "zero value".
-#'
-#' Aggregate the elements of each partition, and then the results for all the
-#' partitions, using given combine functions and a neutral "zero value".
-#' 
-#' @param x An RDD.
-#' @param zeroValue A neutral "zero value".
-#' @param seqOp A function to aggregate the RDD elements. It may return a different
-#'              result type from the type of the RDD elements.
-#' @param combOp A function to aggregate results of seqOp.
-#' @return The aggregation result.
-#' @rdname aggregateRDD
-#' @seealso reduce
-#' @export
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, list(1, 2, 3, 4))
-#' zeroValue <- list(0, 0)
-#' seqOp <- function(x, y) { list(x[[1]] + y, x[[2]] + 1) }
-#' combOp <- function(x, y) { list(x[[1]] + y[[1]], x[[2]] + y[[2]]) }
-#' aggregateRDD(rdd, zeroValue, seqOp, combOp) # list(10, 4)
-#'}
-#' @rdname aggregateRDD
-#' @aliases aggregateRDD,RDD,RDD-method
+# Aggregate an RDD using the given combine functions and a neutral "zero value".
+#
+# Aggregate the elements of each partition, and then the results for all the
+# partitions, using given combine functions and a neutral "zero value".
+# 
+# @param x An RDD.
+# @param zeroValue A neutral "zero value".
+# @param seqOp A function to aggregate the RDD elements. It may return a different
+#              result type from the type of the RDD elements.
+# @param combOp A function to aggregate results of seqOp.
+# @return The aggregation result.
+# @rdname aggregateRDD
+# @seealso reduce
+# @export
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, list(1, 2, 3, 4))
+# zeroValue <- list(0, 0)
+# seqOp <- function(x, y) { list(x[[1]] + y, x[[2]] + 1) }
+# combOp <- function(x, y) { list(x[[1]] + y[[1]], x[[2]] + y[[2]]) }
+# aggregateRDD(rdd, zeroValue, seqOp, combOp) # list(10, 4)
+#}
+# @rdname aggregateRDD
+# @aliases aggregateRDD,RDD,RDD-method
 setMethod("aggregateRDD",
           signature(x = "RDD", zeroValue = "ANY", seqOp = "ANY", combOp = "ANY"),
           function(x, zeroValue, seqOp, combOp) {        
@@ -1240,25 +1240,25 @@ setMethod("aggregateRDD",
             Reduce(combOp, partitionList, zeroValue)
           })
 
-#' Pipes elements to a forked external process.
-#'
-#' The same as 'pipe()' in Spark.
-#'
-#' @param x The RDD whose elements are piped to the forked external process.
-#' @param command The command to fork an external process.
-#' @param env A named list to set environment variables of the external process.
-#' @return A new RDD created by piping all elements to a forked external process.
-#' @rdname pipeRDD
-#' @export
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, 1:10)
-#' collect(pipeRDD(rdd, "more")
-#' Output: c("1", "2", ..., "10")
-#'}
-#' @rdname pipeRDD
-#' @aliases pipeRDD,RDD,character-method
+# Pipes elements to a forked external process.
+#
+# The same as 'pipe()' in Spark.
+#
+# @param x The RDD whose elements are piped to the forked external process.
+# @param command The command to fork an external process.
+# @param env A named list to set environment variables of the external process.
+# @return A new RDD created by piping all elements to a forked external process.
+# @rdname pipeRDD
+# @export
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, 1:10)
+# collect(pipeRDD(rdd, "more")
+# Output: c("1", "2", ..., "10")
+#}
+# @rdname pipeRDD
+# @aliases pipeRDD,RDD,character-method
 setMethod("pipeRDD",
           signature(x = "RDD", command = "character"),
           function(x, command, env = list()) {
@@ -1274,41 +1274,41 @@ setMethod("pipeRDD",
           })
 
 # TODO: Consider caching the name in the RDD's environment
-#' Return an RDD's name.
-#'
-#' @param x The RDD whose name is returned.
-#' @rdname name
-#' @export
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, list(1,2,3))
-#' name(rdd) # NULL (if not set before)
-#'}
-#' @rdname name
-#' @aliases name,RDD
+# Return an RDD's name.
+#
+# @param x The RDD whose name is returned.
+# @rdname name
+# @export
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, list(1,2,3))
+# name(rdd) # NULL (if not set before)
+#}
+# @rdname name
+# @aliases name,RDD
 setMethod("name",
           signature(x = "RDD"),
           function(x) {
             callJMethod(getJRDD(x), "name")
           })
 
-#' Set an RDD's name.
-#'
-#' @param x The RDD whose name is to be set.
-#' @param name The RDD name to be set.
-#' @return a new RDD renamed.
-#' @rdname setName
-#' @export
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, list(1,2,3))
-#' setName(rdd, "myRDD")
-#' name(rdd) # "myRDD"
-#'}
-#' @rdname setName
-#' @aliases setName,RDD
+# Set an RDD's name.
+#
+# @param x The RDD whose name is to be set.
+# @param name The RDD name to be set.
+# @return a new RDD renamed.
+# @rdname setName
+# @export
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, list(1,2,3))
+# setName(rdd, "myRDD")
+# name(rdd) # "myRDD"
+#}
+# @rdname setName
+# @aliases setName,RDD
 setMethod("setName",
           signature(x = "RDD", name = "character"),
           function(x, name) {
@@ -1316,25 +1316,25 @@ setMethod("setName",
             x
           })
 
-#' Zip an RDD with generated unique Long IDs.
-#'
-#' Items in the kth partition will get ids k, n+k, 2*n+k, ..., where
-#' n is the number of partitions. So there may exist gaps, but this
-#' method won't trigger a spark job, which is different from
-#' zipWithIndex.
-#'
-#' @param x An RDD to be zipped.
-#' @return An RDD with zipped items.
-#' @seealso zipWithIndex
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, list("a", "b", "c", "d", "e"), 3L)
-#' collect(zipWithUniqueId(rdd)) 
-#' # list(list("a", 0), list("b", 3), list("c", 1), list("d", 4), list("e", 2))
-#'}
-#' @rdname zipWithUniqueId
-#' @aliases zipWithUniqueId,RDD
+# Zip an RDD with generated unique Long IDs.
+#
+# Items in the kth partition will get ids k, n+k, 2*n+k, ..., where
+# n is the number of partitions. So there may exist gaps, but this
+# method won't trigger a spark job, which is different from
+# zipWithIndex.
+#
+# @param x An RDD to be zipped.
+# @return An RDD with zipped items.
+# @seealso zipWithIndex
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, list("a", "b", "c", "d", "e"), 3L)
+# collect(zipWithUniqueId(rdd)) 
+# # list(list("a", 0), list("b", 3), list("c", 1), list("d", 4), list("e", 2))
+#}
+# @rdname zipWithUniqueId
+# @aliases zipWithUniqueId,RDD
 setMethod("zipWithUniqueId",
           signature(x = "RDD"),
           function(x) {
@@ -1353,28 +1353,28 @@ setMethod("zipWithUniqueId",
             lapplyPartitionsWithIndex(x, partitionFunc)
           })
 
-#' Zip an RDD with its element indices.
-#'
-#' The ordering is first based on the partition index and then the
-#' ordering of items within each partition. So the first item in
-#' the first partition gets index 0, and the last item in the last
-#' partition receives the largest index.
-#'
-#' This method needs to trigger a Spark job when this RDD contains
-#' more than one partition.
-#'
-#' @param x An RDD to be zipped.
-#' @return An RDD with zipped items.
-#' @seealso zipWithUniqueId
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, list("a", "b", "c", "d", "e"), 3L)
-#' collect(zipWithIndex(rdd))
-#' # list(list("a", 0), list("b", 1), list("c", 2), list("d", 3), list("e", 4))
-#'}
-#' @rdname zipWithIndex
-#' @aliases zipWithIndex,RDD
+# Zip an RDD with its element indices.
+#
+# The ordering is first based on the partition index and then the
+# ordering of items within each partition. So the first item in
+# the first partition gets index 0, and the last item in the last
+# partition receives the largest index.
+#
+# This method needs to trigger a Spark job when this RDD contains
+# more than one partition.
+#
+# @param x An RDD to be zipped.
+# @return An RDD with zipped items.
+# @seealso zipWithUniqueId
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, list("a", "b", "c", "d", "e"), 3L)
+# collect(zipWithIndex(rdd))
+# # list(list("a", 0), list("b", 1), list("c", 2), list("d", 3), list("e", 4))
+#}
+# @rdname zipWithIndex
+# @aliases zipWithIndex,RDD
 setMethod("zipWithIndex",
           signature(x = "RDD"),
           function(x) {
@@ -1406,20 +1406,20 @@ setMethod("zipWithIndex",
            lapplyPartitionsWithIndex(x, partitionFunc)
          })
 
-#' Coalesce all elements within each partition of an RDD into a list.
-#'
-#' @param x An RDD.
-#' @return An RDD created by coalescing all elements within
-#'         each partition into a list.
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, as.list(1:4), 2L)
-#' collect(glom(rdd))
-#' # list(list(1, 2), list(3, 4))
-#'}
-#' @rdname glom
-#' @aliases glom,RDD
+# Coalesce all elements within each partition of an RDD into a list.
+#
+# @param x An RDD.
+# @return An RDD created by coalescing all elements within
+#         each partition into a list.
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, as.list(1:4), 2L)
+# collect(glom(rdd))
+# # list(list(1, 2), list(3, 4))
+#}
+# @rdname glom
+# @aliases glom,RDD
 setMethod("glom",
           signature(x = "RDD"),
           function(x) {
@@ -1432,21 +1432,21 @@ setMethod("glom",
 
 ############ Binary Functions #############
 
-#' Return the union RDD of two RDDs.
-#' The same as union() in Spark.
-#'
-#' @param x An RDD.
-#' @param y An RDD.
-#' @return a new RDD created by performing the simple union (witout removing
-#' duplicates) of two input RDDs.
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, 1:3)
-#' unionRDD(rdd, rdd) # 1, 2, 3, 1, 2, 3
-#'}
-#' @rdname unionRDD
-#' @aliases unionRDD,RDD,RDD-method
+# Return the union RDD of two RDDs.
+# The same as union() in Spark.
+#
+# @param x An RDD.
+# @param y An RDD.
+# @return a new RDD created by performing the simple union (witout removing
+# duplicates) of two input RDDs.
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, 1:3)
+# unionRDD(rdd, rdd) # 1, 2, 3, 1, 2, 3
+#}
+# @rdname unionRDD
+# @aliases unionRDD,RDD,RDD-method
 setMethod("unionRDD",
           signature(x = "RDD", y = "RDD"),
           function(x, y) {
@@ -1463,27 +1463,27 @@ setMethod("unionRDD",
             union.rdd
           })
 
-#' Zip an RDD with another RDD.
-#'
-#' Zips this RDD with another one, returning key-value pairs with the
-#' first element in each RDD second element in each RDD, etc. Assumes
-#' that the two RDDs have the same number of partitions and the same
-#' number of elements in each partition (e.g. one was made through
-#' a map on the other).
-#'
-#' @param x An RDD to be zipped.
-#' @param other Another RDD to be zipped.
-#' @return An RDD zipped from the two RDDs.
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd1 <- parallelize(sc, 0:4)
-#' rdd2 <- parallelize(sc, 1000:1004)
-#' collect(zipRDD(rdd1, rdd2))
-#' # list(list(0, 1000), list(1, 1001), list(2, 1002), list(3, 1003), list(4, 1004))
-#'}
-#' @rdname zipRDD
-#' @aliases zipRDD,RDD
+# Zip an RDD with another RDD.
+#
+# Zips this RDD with another one, returning key-value pairs with the
+# first element in each RDD second element in each RDD, etc. Assumes
+# that the two RDDs have the same number of partitions and the same
+# number of elements in each partition (e.g. one was made through
+# a map on the other).
+#
+# @param x An RDD to be zipped.
+# @param other Another RDD to be zipped.
+# @return An RDD zipped from the two RDDs.
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd1 <- parallelize(sc, 0:4)
+# rdd2 <- parallelize(sc, 1000:1004)
+# collect(zipRDD(rdd1, rdd2))
+# # list(list(0, 1000), list(1, 1001), list(2, 1002), list(3, 1003), list(4, 1004))
+#}
+# @rdname zipRDD
+# @aliases zipRDD,RDD
 setMethod("zipRDD",
           signature(x = "RDD", other = "RDD"),
           function(x, other) {
@@ -1502,24 +1502,24 @@ setMethod("zipRDD",
             mergePartitions(rdd, TRUE)
           })
 
-#' Cartesian product of this RDD and another one.
-#'
-#' Return the Cartesian product of this RDD and another one, 
-#' that is, the RDD of all pairs of elements (a, b) where a 
-#' is in this and b is in other.
-#' 
-#' @param x An RDD.
-#' @param other An RDD.
-#' @return A new RDD which is the Cartesian product of these two RDDs.
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, 1:2)
-#' sortByKey(cartesian(rdd, rdd)) 
-#' # list(list(1, 1), list(1, 2), list(2, 1), list(2, 2))
-#'}
-#' @rdname cartesian
-#' @aliases cartesian,RDD,RDD-method
+# Cartesian product of this RDD and another one.
+#
+# Return the Cartesian product of this RDD and another one, 
+# that is, the RDD of all pairs of elements (a, b) where a 
+# is in this and b is in other.
+# 
+# @param x An RDD.
+# @param other An RDD.
+# @return A new RDD which is the Cartesian product of these two RDDs.
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, 1:2)
+# sortByKey(cartesian(rdd, rdd)) 
+# # list(list(1, 1), list(1, 2), list(2, 1), list(2, 2))
+#}
+# @rdname cartesian
+# @aliases cartesian,RDD,RDD-method
 setMethod("cartesian",
           signature(x = "RDD", other = "RDD"),
           function(x, other) {
@@ -1532,24 +1532,24 @@ setMethod("cartesian",
             mergePartitions(rdd, FALSE)
           })
 
-#' Subtract an RDD with another RDD.
-#'
-#' Return an RDD with the elements from this that are not in other.
-#'
-#' @param x An RDD.
-#' @param other An RDD.
-#' @param numPartitions Number of the partitions in the result RDD.
-#' @return An RDD with the elements from this that are not in other.
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd1 <- parallelize(sc, list(1, 1, 2, 2, 3, 4))
-#' rdd2 <- parallelize(sc, list(2, 4))
-#' collect(subtract(rdd1, rdd2))
-#' # list(1, 1, 3)
-#'}
-#' @rdname subtract
-#' @aliases subtract,RDD
+# Subtract an RDD with another RDD.
+#
+# Return an RDD with the elements from this that are not in other.
+#
+# @param x An RDD.
+# @param other An RDD.
+# @param numPartitions Number of the partitions in the result RDD.
+# @return An RDD with the elements from this that are not in other.
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd1 <- parallelize(sc, list(1, 1, 2, 2, 3, 4))
+# rdd2 <- parallelize(sc, list(2, 4))
+# collect(subtract(rdd1, rdd2))
+# # list(1, 1, 3)
+#}
+# @rdname subtract
+# @aliases subtract,RDD
 setMethod("subtract",
           signature(x = "RDD", other = "RDD"),
           function(x, other, numPartitions = SparkR:::numPartitions(x)) {
@@ -1559,28 +1559,28 @@ setMethod("subtract",
             keys(subtractByKey(rdd1, rdd2, numPartitions))
           })
 
-#' Intersection of this RDD and another one.
-#'
-#' Return the intersection of this RDD and another one.
-#' The output will not contain any duplicate elements,
-#' even if the input RDDs did. Performs a hash partition
-#' across the cluster.
-#' Note that this method performs a shuffle internally.
-#'
-#' @param x An RDD.
-#' @param other An RDD.
-#' @param numPartitions The number of partitions in the result RDD.
-#' @return An RDD which is the intersection of these two RDDs.
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd1 <- parallelize(sc, list(1, 10, 2, 3, 4, 5))
-#' rdd2 <- parallelize(sc, list(1, 6, 2, 3, 7, 8))
-#' collect(sortBy(intersection(rdd1, rdd2), function(x) { x }))
-#' # list(1, 2, 3)
-#'}
-#' @rdname intersection
-#' @aliases intersection,RDD
+# Intersection of this RDD and another one.
+#
+# Return the intersection of this RDD and another one.
+# The output will not contain any duplicate elements,
+# even if the input RDDs did. Performs a hash partition
+# across the cluster.
+# Note that this method performs a shuffle internally.
+#
+# @param x An RDD.
+# @param other An RDD.
+# @param numPartitions The number of partitions in the result RDD.
+# @return An RDD which is the intersection of these two RDDs.
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd1 <- parallelize(sc, list(1, 10, 2, 3, 4, 5))
+# rdd2 <- parallelize(sc, list(1, 6, 2, 3, 7, 8))
+# collect(sortBy(intersection(rdd1, rdd2), function(x) { x }))
+# # list(1, 2, 3)
+#}
+# @rdname intersection
+# @aliases intersection,RDD
 setMethod("intersection",
           signature(x = "RDD", other = "RDD"),
           function(x, other, numPartitions = SparkR:::numPartitions(x)) {
@@ -1596,26 +1596,26 @@ setMethod("intersection",
             keys(filterRDD(cogroup(rdd1, rdd2, numPartitions = numPartitions), filterFunction))
           })
 
-#' Zips an RDD's partitions with one (or more) RDD(s).
-#' Same as zipPartitions in Spark.
-#' 
-#' @param ... RDDs to be zipped.
-#' @param func A function to transform zipped partitions.
-#' @return A new RDD by applying a function to the zipped partitions. 
-#'         Assumes that all the RDDs have the *same number of partitions*, but 
-#'         does *not* require them to have the same number of elements in each partition.
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd1 <- parallelize(sc, 1:2, 2L)  # 1, 2
-#' rdd2 <- parallelize(sc, 1:4, 2L)  # 1:2, 3:4
-#' rdd3 <- parallelize(sc, 1:6, 2L)  # 1:3, 4:6
-#' collect(zipPartitions(rdd1, rdd2, rdd3, 
-#'                       func = function(x, y, z) { list(list(x, y, z))} ))
-#' # list(list(1, c(1,2), c(1,2,3)), list(2, c(3,4), c(4,5,6)))
-#'}
-#' @rdname zipRDD
-#' @aliases zipPartitions,RDD
+# Zips an RDD's partitions with one (or more) RDD(s).
+# Same as zipPartitions in Spark.
+# 
+# @param ... RDDs to be zipped.
+# @param func A function to transform zipped partitions.
+# @return A new RDD by applying a function to the zipped partitions. 
+#         Assumes that all the RDDs have the *same number of partitions*, but 
+#         does *not* require them to have the same number of elements in each partition.
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd1 <- parallelize(sc, 1:2, 2L)  # 1, 2
+# rdd2 <- parallelize(sc, 1:4, 2L)  # 1:2, 3:4
+# rdd3 <- parallelize(sc, 1:6, 2L)  # 1:3, 4:6
+# collect(zipPartitions(rdd1, rdd2, rdd3, 
+#                       func = function(x, y, z) { list(list(x, y, z))} ))
+# # list(list(1, c(1,2), c(1,2,3)), list(2, c(3,4), c(4,5,6)))
+#}
+# @rdname zipRDD
+# @aliases zipPartitions,RDD
 setMethod("zipPartitions",
           "RDD",
           function(..., func) {
diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R
index 4f05ba524a01a..cae06e6af2bff 100644
--- a/R/pkg/R/SQLContext.R
+++ b/R/pkg/R/SQLContext.R
@@ -150,21 +150,21 @@ createDataFrame <- function(sqlCtx, data, schema = NULL, samplingRatio = 1.0) {
   dataFrame(sdf)
 }
 
-#' toDF
-#'
-#' Converts an RDD to a DataFrame by infer the types.
-#'
-#' @param x An RDD
-#'
-#' @rdname DataFrame
-#' @export
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' sqlCtx <- sparkRSQL.init(sc)
-#' rdd <- lapply(parallelize(sc, 1:10), function(x) list(a=x, b=as.character(x)))
-#' df <- toDF(rdd)
-#' }
+# toDF
+#
+# Converts an RDD to a DataFrame by infer the types.
+#
+# @param x An RDD
+#
+# @rdname DataFrame
+# @export
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# sqlCtx <- sparkRSQL.init(sc)
+# rdd <- lapply(parallelize(sc, 1:10), function(x) list(a=x, b=as.character(x)))
+# df <- toDF(rdd)
+# }
 
 setGeneric("toDF", function(x, ...) { standardGeneric("toDF") })
 
@@ -207,23 +207,23 @@ jsonFile <- function(sqlCtx, path) {
 }
 
 
-#' JSON RDD
-#'
-#' Loads an RDD storing one JSON object per string as a DataFrame.
-#'
-#' @param sqlCtx SQLContext to use
-#' @param rdd An RDD of JSON string
-#' @param schema A StructType object to use as schema
-#' @param samplingRatio The ratio of simpling used to infer the schema
-#' @return A DataFrame
-#' @export
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' sqlCtx <- sparkRSQL.init(sc)
-#' rdd <- texFile(sc, "path/to/json")
-#' df <- jsonRDD(sqlCtx, rdd)
-#' }
+# JSON RDD
+#
+# Loads an RDD storing one JSON object per string as a DataFrame.
+#
+# @param sqlCtx SQLContext to use
+# @param rdd An RDD of JSON string
+# @param schema A StructType object to use as schema
+# @param samplingRatio The ratio of simpling used to infer the schema
+# @return A DataFrame
+# @export
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# sqlCtx <- sparkRSQL.init(sc)
+# rdd <- texFile(sc, "path/to/json")
+# df <- jsonRDD(sqlCtx, rdd)
+# }
 
 # TODO: support schema
 jsonRDD <- function(sqlCtx, rdd, schema = NULL, samplingRatio = 1.0) {
diff --git a/R/pkg/R/broadcast.R b/R/pkg/R/broadcast.R
index 583fa2e7fdcfd..23dc38780716e 100644
--- a/R/pkg/R/broadcast.R
+++ b/R/pkg/R/broadcast.R
@@ -23,21 +23,21 @@
 .broadcastValues <- new.env()
 .broadcastIdToName <- new.env()
 
-#' @title S4 class that represents a Broadcast variable
-#' @description Broadcast variables can be created using the broadcast
-#'              function from a \code{SparkContext}.
-#' @rdname broadcast-class
-#' @seealso broadcast 
-#'
-#' @param id Id of the backing Spark broadcast variable 
-#' @export
+# @title S4 class that represents a Broadcast variable
+# @description Broadcast variables can be created using the broadcast
+#              function from a \code{SparkContext}.
+# @rdname broadcast-class
+# @seealso broadcast 
+#
+# @param id Id of the backing Spark broadcast variable 
+# @export
 setClass("Broadcast", slots = list(id = "character"))
 
-#' @rdname broadcast-class
-#' @param value Value of the broadcast variable
-#' @param jBroadcastRef reference to the backing Java broadcast object
-#' @param objName name of broadcasted object
-#' @export
+# @rdname broadcast-class
+# @param value Value of the broadcast variable
+# @param jBroadcastRef reference to the backing Java broadcast object
+# @param objName name of broadcasted object
+# @export
 Broadcast <- function(id, value, jBroadcastRef, objName) {
   .broadcastValues[[id]] <- value
   .broadcastNames[[as.character(objName)]] <- jBroadcastRef
@@ -45,13 +45,13 @@ Broadcast <- function(id, value, jBroadcastRef, objName) {
   new("Broadcast", id = id)
 }
 
-#' @description
-#' \code{value} can be used to get the value of a broadcast variable inside
-#' a distributed function.
-#'
-#' @param bcast The broadcast variable to get
-#' @rdname broadcast
-#' @aliases value,Broadcast-method
+# @description
+# \code{value} can be used to get the value of a broadcast variable inside
+# a distributed function.
+#
+# @param bcast The broadcast variable to get
+# @rdname broadcast
+# @aliases value,Broadcast-method
 setMethod("value",
           signature(bcast = "Broadcast"),
           function(bcast) {
@@ -62,24 +62,24 @@ setMethod("value",
             }
           })
 
-#' Internal function to set values of a broadcast variable.
-#'
-#' This function is used internally by Spark to set the value of a broadcast
-#' variable on workers. Not intended for use outside the package.
-#'
-#' @rdname broadcast-internal
-#' @seealso broadcast, value 
+# Internal function to set values of a broadcast variable.
+#
+# This function is used internally by Spark to set the value of a broadcast
+# variable on workers. Not intended for use outside the package.
+#
+# @rdname broadcast-internal
+# @seealso broadcast, value 
 
-#' @param bcastId The id of broadcast variable to set
-#' @param value The value to be set
-#' @export
+# @param bcastId The id of broadcast variable to set
+# @param value The value to be set
+# @export
 setBroadcastValue <- function(bcastId, value) {
   bcastIdStr <- as.character(bcastId)
   .broadcastValues[[bcastIdStr]] <- value
 }
 
-#' Helper function to clear the list of broadcast variables we know about
-#' Should be called when the SparkR JVM backend is shutdown
+# Helper function to clear the list of broadcast variables we know about
+# Should be called when the SparkR JVM backend is shutdown
 clearBroadcastVariables <- function() {
   bcasts <- ls(.broadcastNames)
   rm(list = bcasts, envir = .broadcastNames)
diff --git a/R/pkg/R/context.R b/R/pkg/R/context.R
index b4845b6948997..43be9c904fdf6 100644
--- a/R/pkg/R/context.R
+++ b/R/pkg/R/context.R
@@ -25,27 +25,27 @@ getMinPartitions <- function(sc, minPartitions) {
   as.integer(minPartitions)
 }
 
-#' Create an RDD from a text file.
-#'
-#' This function reads a text file from HDFS, a local file system (available on all
-#' nodes), or any Hadoop-supported file system URI, and creates an
-#' RDD of strings from it.
-#'
-#' @param sc SparkContext to use
-#' @param path Path of file to read. A vector of multiple paths is allowed.
-#' @param minPartitions Minimum number of partitions to be created. If NULL, the default
-#'  value is chosen based on available parallelism.
-#' @return RDD where each item is of type \code{character}
-#' @export
-#' @examples
-#'\dontrun{
-#'  sc <- sparkR.init()
-#'  lines <- textFile(sc, "myfile.txt")
-#'}
+# Create an RDD from a text file.
+#
+# This function reads a text file from HDFS, a local file system (available on all
+# nodes), or any Hadoop-supported file system URI, and creates an
+# RDD of strings from it.
+#
+# @param sc SparkContext to use
+# @param path Path of file to read. A vector of multiple paths is allowed.
+# @param minPartitions Minimum number of partitions to be created. If NULL, the default
+#  value is chosen based on available parallelism.
+# @return RDD where each item is of type \code{character}
+# @export
+# @examples
+#\dontrun{
+#  sc <- sparkR.init()
+#  lines <- textFile(sc, "myfile.txt")
+#}
 textFile <- function(sc, path, minPartitions = NULL) {
   # Allow the user to have a more flexible definiton of the text file path
   path <- suppressWarnings(normalizePath(path))
-  #' Convert a string vector of paths to a string containing comma separated paths
+  # Convert a string vector of paths to a string containing comma separated paths
   path <- paste(path, collapse = ",")
 
   jrdd <- callJMethod(sc, "textFile", path, getMinPartitions(sc, minPartitions))
@@ -53,27 +53,27 @@ textFile <- function(sc, path, minPartitions = NULL) {
   RDD(jrdd, "string")
 }
 
-#' Load an RDD saved as a SequenceFile containing serialized objects.
-#'
-#' The file to be loaded should be one that was previously generated by calling
-#' saveAsObjectFile() of the RDD class.
-#'
-#' @param sc SparkContext to use
-#' @param path Path of file to read. A vector of multiple paths is allowed.
-#' @param minPartitions Minimum number of partitions to be created. If NULL, the default
-#'  value is chosen based on available parallelism.
-#' @return RDD containing serialized R objects.
-#' @seealso saveAsObjectFile
-#' @export
-#' @examples
-#'\dontrun{
-#'  sc <- sparkR.init()
-#'  rdd <- objectFile(sc, "myfile")
-#'}
+# Load an RDD saved as a SequenceFile containing serialized objects.
+#
+# The file to be loaded should be one that was previously generated by calling
+# saveAsObjectFile() of the RDD class.
+#
+# @param sc SparkContext to use
+# @param path Path of file to read. A vector of multiple paths is allowed.
+# @param minPartitions Minimum number of partitions to be created. If NULL, the default
+#  value is chosen based on available parallelism.
+# @return RDD containing serialized R objects.
+# @seealso saveAsObjectFile
+# @export
+# @examples
+#\dontrun{
+#  sc <- sparkR.init()
+#  rdd <- objectFile(sc, "myfile")
+#}
 objectFile <- function(sc, path, minPartitions = NULL) {
   # Allow the user to have a more flexible definiton of the text file path
   path <- suppressWarnings(normalizePath(path))
-  #' Convert a string vector of paths to a string containing comma separated paths
+  # Convert a string vector of paths to a string containing comma separated paths
   path <- paste(path, collapse = ",")
 
   jrdd <- callJMethod(sc, "objectFile", path, getMinPartitions(sc, minPartitions))
@@ -81,24 +81,24 @@ objectFile <- function(sc, path, minPartitions = NULL) {
   RDD(jrdd, "byte")
 }
 
-#' Create an RDD from a homogeneous list or vector.
-#'
-#' This function creates an RDD from a local homogeneous list in R. The elements
-#' in the list are split into \code{numSlices} slices and distributed to nodes
-#' in the cluster.
-#'
-#' @param sc SparkContext to use
-#' @param coll collection to parallelize
-#' @param numSlices number of partitions to create in the RDD
-#' @return an RDD created from this collection
-#' @export
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, 1:10, 2)
-#' # The RDD should contain 10 elements
-#' length(rdd)
-#'}
+# Create an RDD from a homogeneous list or vector.
+#
+# This function creates an RDD from a local homogeneous list in R. The elements
+# in the list are split into \code{numSlices} slices and distributed to nodes
+# in the cluster.
+#
+# @param sc SparkContext to use
+# @param coll collection to parallelize
+# @param numSlices number of partitions to create in the RDD
+# @return an RDD created from this collection
+# @export
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, 1:10, 2)
+# # The RDD should contain 10 elements
+# length(rdd)
+#}
 parallelize <- function(sc, coll, numSlices = 1) {
   # TODO: bound/safeguard numSlices
   # TODO: unit tests for if the split works for all primitives
@@ -133,33 +133,33 @@ parallelize <- function(sc, coll, numSlices = 1) {
   RDD(jrdd, "byte")
 }
 
-#' Include this specified package on all workers
-#'
-#' This function can be used to include a package on all workers before the
-#' user's code is executed. This is useful in scenarios where other R package
-#' functions are used in a function passed to functions like \code{lapply}.
-#' NOTE: The package is assumed to be installed on every node in the Spark
-#' cluster.
-#'
-#' @param sc SparkContext to use
-#' @param pkg Package name
-#'
-#' @export
-#' @examples
-#'\dontrun{
-#'  library(Matrix)
-#'
-#'  sc <- sparkR.init()
-#'  # Include the matrix library we will be using
-#'  includePackage(sc, Matrix)
-#'
-#'  generateSparse <- function(x) {
-#'    sparseMatrix(i=c(1, 2, 3), j=c(1, 2, 3), x=c(1, 2, 3))
-#'  }
-#'
-#'  rdd <- lapplyPartition(parallelize(sc, 1:2, 2L), generateSparse)
-#'  collect(rdd)
-#'}
+# Include this specified package on all workers
+#
+# This function can be used to include a package on all workers before the
+# user's code is executed. This is useful in scenarios where other R package
+# functions are used in a function passed to functions like \code{lapply}.
+# NOTE: The package is assumed to be installed on every node in the Spark
+# cluster.
+#
+# @param sc SparkContext to use
+# @param pkg Package name
+#
+# @export
+# @examples
+#\dontrun{
+#  library(Matrix)
+#
+#  sc <- sparkR.init()
+#  # Include the matrix library we will be using
+#  includePackage(sc, Matrix)
+#
+#  generateSparse <- function(x) {
+#    sparseMatrix(i=c(1, 2, 3), j=c(1, 2, 3), x=c(1, 2, 3))
+#  }
+#
+#  rdd <- lapplyPartition(parallelize(sc, 1:2, 2L), generateSparse)
+#  collect(rdd)
+#}
 includePackage <- function(sc, pkg) {
   pkg <- as.character(substitute(pkg))
   if (exists(".packages", .sparkREnv)) {
@@ -171,30 +171,30 @@ includePackage <- function(sc, pkg) {
   .sparkREnv$.packages <- packages
 }
 
-#' @title Broadcast a variable to all workers
-#'
-#' @description
-#' Broadcast a read-only variable to the cluster, returning a \code{Broadcast}
-#' object for reading it in distributed functions.
-#'
-#' @param sc Spark Context to use
-#' @param object Object to be broadcast
-#' @export
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, 1:2, 2L)
-#'
-#' # Large Matrix object that we want to broadcast
-#' randomMat <- matrix(nrow=100, ncol=10, data=rnorm(1000))
-#' randomMatBr <- broadcast(sc, randomMat)
-#'
-#' # Use the broadcast variable inside the function
-#' useBroadcast <- function(x) {
-#'   sum(value(randomMatBr) * x)
-#' }
-#' sumRDD <- lapply(rdd, useBroadcast)
-#'}
+# @title Broadcast a variable to all workers
+#
+# @description
+# Broadcast a read-only variable to the cluster, returning a \code{Broadcast}
+# object for reading it in distributed functions.
+#
+# @param sc Spark Context to use
+# @param object Object to be broadcast
+# @export
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, 1:2, 2L)
+#
+# # Large Matrix object that we want to broadcast
+# randomMat <- matrix(nrow=100, ncol=10, data=rnorm(1000))
+# randomMatBr <- broadcast(sc, randomMat)
+#
+# # Use the broadcast variable inside the function
+# useBroadcast <- function(x) {
+#   sum(value(randomMatBr) * x)
+# }
+# sumRDD <- lapply(rdd, useBroadcast)
+#}
 broadcast <- function(sc, object) {
   objName <- as.character(substitute(object))
   serializedObj <- serialize(object, connection = NULL)
@@ -205,21 +205,21 @@ broadcast <- function(sc, object) {
   Broadcast(id, object, jBroadcast, objName)
 }
 
-#' @title Set the checkpoint directory
-#'
-#' Set the directory under which RDDs are going to be checkpointed. The
-#' directory must be a HDFS path if running on a cluster.
-#'
-#' @param sc Spark Context to use
-#' @param dirName Directory path
-#' @export
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' setCheckpointDir(sc, "~/checkpoint")
-#' rdd <- parallelize(sc, 1:2, 2L)
-#' checkpoint(rdd)
-#'}
+# @title Set the checkpoint directory
+#
+# Set the directory under which RDDs are going to be checkpointed. The
+# directory must be a HDFS path if running on a cluster.
+#
+# @param sc Spark Context to use
+# @param dirName Directory path
+# @export
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# setCheckpointDir(sc, "~/checkpoint")
+# rdd <- parallelize(sc, 1:2, 2L)
+# checkpoint(rdd)
+#}
 setCheckpointDir <- function(sc, dirName) {
   invisible(callJMethod(sc, "setCheckpointDir", suppressWarnings(normalizePath(dirName))))
 }
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 5838955f74dad..380e8ebe8c8f4 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -17,353 +17,353 @@
 
 ############ RDD Actions and Transformations ############
 
-#' @rdname aggregateRDD
-#' @seealso reduce
-#' @export
+# @rdname aggregateRDD
+# @seealso reduce
+# @export
 setGeneric("aggregateRDD", function(x, zeroValue, seqOp, combOp) { standardGeneric("aggregateRDD") })
 
-#' @rdname cache-methods
-#' @export
+# @rdname cache-methods
+# @export
 setGeneric("cache", function(x) { standardGeneric("cache") })
 
-#' @rdname coalesce
-#' @seealso repartition
-#' @export
+# @rdname coalesce
+# @seealso repartition
+# @export
 setGeneric("coalesce", function(x, numPartitions, ...) { standardGeneric("coalesce") })
 
-#' @rdname checkpoint-methods
-#' @export
+# @rdname checkpoint-methods
+# @export
 setGeneric("checkpoint", function(x) { standardGeneric("checkpoint") })
 
-#' @rdname collect-methods
-#' @export
+# @rdname collect-methods
+# @export
 setGeneric("collect", function(x, ...) { standardGeneric("collect") })
 
-#' @rdname collect-methods
-#' @export
+# @rdname collect-methods
+# @export
 setGeneric("collectAsMap", function(x) { standardGeneric("collectAsMap") })
 
-#' @rdname collect-methods
-#' @export
+# @rdname collect-methods
+# @export
 setGeneric("collectPartition",
            function(x, partitionId) {
              standardGeneric("collectPartition")
            })
 
-#' @rdname count
-#' @export
+# @rdname count
+# @export
 setGeneric("count", function(x) { standardGeneric("count") })
 
-#' @rdname countByValue
-#' @export
+# @rdname countByValue
+# @export
 setGeneric("countByValue", function(x) { standardGeneric("countByValue") })
 
-#' @rdname distinct
-#' @export
+# @rdname distinct
+# @export
 setGeneric("distinct", function(x, numPartitions = 1) { standardGeneric("distinct") })
 
-#' @rdname filterRDD
-#' @export
+# @rdname filterRDD
+# @export
 setGeneric("filterRDD", function(x, f) { standardGeneric("filterRDD") })
 
-#' @rdname first
-#' @export
+# @rdname first
+# @export
 setGeneric("first", function(x) { standardGeneric("first") })
 
-#' @rdname flatMap
-#' @export
+# @rdname flatMap
+# @export
 setGeneric("flatMap", function(X, FUN) { standardGeneric("flatMap") })
 
-#' @rdname fold
-#' @seealso reduce
-#' @export
+# @rdname fold
+# @seealso reduce
+# @export
 setGeneric("fold", function(x, zeroValue, op) { standardGeneric("fold") })
 
-#' @rdname foreach
-#' @export
+# @rdname foreach
+# @export
 setGeneric("foreach", function(x, func) { standardGeneric("foreach") })
 
-#' @rdname foreach
-#' @export
+# @rdname foreach
+# @export
 setGeneric("foreachPartition", function(x, func) { standardGeneric("foreachPartition") })
 
 # The jrdd accessor function.
 setGeneric("getJRDD", function(rdd, ...) { standardGeneric("getJRDD") })
 
-#' @rdname glom
-#' @export
+# @rdname glom
+# @export
 setGeneric("glom", function(x) { standardGeneric("glom") })
 
-#' @rdname keyBy
-#' @export
+# @rdname keyBy
+# @export
 setGeneric("keyBy", function(x, func) { standardGeneric("keyBy") })
 
-#' @rdname lapplyPartition
-#' @export
+# @rdname lapplyPartition
+# @export
 setGeneric("lapplyPartition", function(X, FUN) { standardGeneric("lapplyPartition") })
 
-#' @rdname lapplyPartitionsWithIndex
-#' @export
+# @rdname lapplyPartitionsWithIndex
+# @export
 setGeneric("lapplyPartitionsWithIndex",
            function(X, FUN) {
              standardGeneric("lapplyPartitionsWithIndex")
            })
 
-#' @rdname lapply
-#' @export
+# @rdname lapply
+# @export
 setGeneric("map", function(X, FUN) { standardGeneric("map") })
 
-#' @rdname lapplyPartition
-#' @export
+# @rdname lapplyPartition
+# @export
 setGeneric("mapPartitions", function(X, FUN) { standardGeneric("mapPartitions") })
 
-#' @rdname lapplyPartitionsWithIndex
-#' @export
+# @rdname lapplyPartitionsWithIndex
+# @export
 setGeneric("mapPartitionsWithIndex",
            function(X, FUN) { standardGeneric("mapPartitionsWithIndex") })
 
-#' @rdname maximum
-#' @export
+# @rdname maximum
+# @export
 setGeneric("maximum", function(x) { standardGeneric("maximum") })
 
-#' @rdname minimum
-#' @export
+# @rdname minimum
+# @export
 setGeneric("minimum", function(x) { standardGeneric("minimum") })
 
-#' @rdname sumRDD 
-#' @export
+# @rdname sumRDD 
+# @export
 setGeneric("sumRDD", function(x) { standardGeneric("sumRDD") })
 
-#' @rdname name
-#' @export
+# @rdname name
+# @export
 setGeneric("name", function(x) { standardGeneric("name") })
 
-#' @rdname numPartitions
-#' @export
+# @rdname numPartitions
+# @export
 setGeneric("numPartitions", function(x) { standardGeneric("numPartitions") })
 
-#' @rdname persist
-#' @export
+# @rdname persist
+# @export
 setGeneric("persist", function(x, newLevel) { standardGeneric("persist") })
 
-#' @rdname pipeRDD
-#' @export
+# @rdname pipeRDD
+# @export
 setGeneric("pipeRDD", function(x, command, env = list()) { standardGeneric("pipeRDD")})
 
-#' @rdname reduce
-#' @export
+# @rdname reduce
+# @export
 setGeneric("reduce", function(x, func) { standardGeneric("reduce") })
 
-#' @rdname repartition
-#' @seealso coalesce
-#' @export
+# @rdname repartition
+# @seealso coalesce
+# @export
 setGeneric("repartition", function(x, numPartitions) { standardGeneric("repartition") })
 
-#' @rdname sampleRDD
-#' @export
+# @rdname sampleRDD
+# @export
 setGeneric("sampleRDD",
            function(x, withReplacement, fraction, seed) {
              standardGeneric("sampleRDD")
            })
 
-#' @rdname saveAsObjectFile
-#' @seealso objectFile
-#' @export
+# @rdname saveAsObjectFile
+# @seealso objectFile
+# @export
 setGeneric("saveAsObjectFile", function(x, path) { standardGeneric("saveAsObjectFile") })
 
-#' @rdname saveAsTextFile
-#' @export
+# @rdname saveAsTextFile
+# @export
 setGeneric("saveAsTextFile", function(x, path) { standardGeneric("saveAsTextFile") })
 
-#' @rdname setName
-#' @export
+# @rdname setName
+# @export
 setGeneric("setName", function(x, name) { standardGeneric("setName") })
 
-#' @rdname sortBy
-#' @export
+# @rdname sortBy
+# @export
 setGeneric("sortBy",
            function(x, func, ascending = TRUE, numPartitions = 1) {
              standardGeneric("sortBy")
            })
 
-#' @rdname take
-#' @export
+# @rdname take
+# @export
 setGeneric("take", function(x, num) { standardGeneric("take") })
 
-#' @rdname takeOrdered
-#' @export
+# @rdname takeOrdered
+# @export
 setGeneric("takeOrdered", function(x, num) { standardGeneric("takeOrdered") })
 
-#' @rdname takeSample
-#' @export
+# @rdname takeSample
+# @export
 setGeneric("takeSample",
            function(x, withReplacement, num, seed) {
              standardGeneric("takeSample")
            })
 
-#' @rdname top
-#' @export
+# @rdname top
+# @export
 setGeneric("top", function(x, num) { standardGeneric("top") })
 
-#' @rdname unionRDD
-#' @export
+# @rdname unionRDD
+# @export
 setGeneric("unionRDD", function(x, y) { standardGeneric("unionRDD") })
 
-#' @rdname unpersist-methods
-#' @export
+# @rdname unpersist-methods
+# @export
 setGeneric("unpersist", function(x, ...) { standardGeneric("unpersist") })
 
-#' @rdname zipRDD
-#' @export
+# @rdname zipRDD
+# @export
 setGeneric("zipRDD", function(x, other) { standardGeneric("zipRDD") })
 
-#' @rdname zipRDD
-#' @export
+# @rdname zipRDD
+# @export
 setGeneric("zipPartitions", function(..., func) { standardGeneric("zipPartitions") }, 
            signature = "...")
 
-#' @rdname zipWithIndex
-#' @seealso zipWithUniqueId
-#' @export
+# @rdname zipWithIndex
+# @seealso zipWithUniqueId
+# @export
 setGeneric("zipWithIndex", function(x) { standardGeneric("zipWithIndex") })
 
-#' @rdname zipWithUniqueId
-#' @seealso zipWithIndex
-#' @export
+# @rdname zipWithUniqueId
+# @seealso zipWithIndex
+# @export
 setGeneric("zipWithUniqueId", function(x) { standardGeneric("zipWithUniqueId") })
 
 
 ############ Binary Functions #############
 
-#' @rdname cartesian
-#' @export
+# @rdname cartesian
+# @export
 setGeneric("cartesian", function(x, other) { standardGeneric("cartesian") })
 
-#' @rdname countByKey
-#' @export
+# @rdname countByKey
+# @export
 setGeneric("countByKey", function(x) { standardGeneric("countByKey") })
 
-#' @rdname flatMapValues
-#' @export
+# @rdname flatMapValues
+# @export
 setGeneric("flatMapValues", function(X, FUN) { standardGeneric("flatMapValues") })
 
-#' @rdname intersection
-#' @export
+# @rdname intersection
+# @export
 setGeneric("intersection", function(x, other, numPartitions = 1) {
   standardGeneric("intersection") })
 
-#' @rdname keys
-#' @export
+# @rdname keys
+# @export
 setGeneric("keys", function(x) { standardGeneric("keys") })
 
-#' @rdname lookup
-#' @export
+# @rdname lookup
+# @export
 setGeneric("lookup", function(x, key) { standardGeneric("lookup") })
 
-#' @rdname mapValues
-#' @export
+# @rdname mapValues
+# @export
 setGeneric("mapValues", function(X, FUN) { standardGeneric("mapValues") })
 
-#' @rdname sampleByKey
-#' @export
+# @rdname sampleByKey
+# @export
 setGeneric("sampleByKey",
            function(x, withReplacement, fractions, seed) {
              standardGeneric("sampleByKey")
            })
 
-#' @rdname values
-#' @export
+# @rdname values
+# @export
 setGeneric("values", function(x) { standardGeneric("values") })
 
 
 ############ Shuffle Functions ############
 
-#' @rdname aggregateByKey
-#' @seealso foldByKey, combineByKey
-#' @export
+# @rdname aggregateByKey
+# @seealso foldByKey, combineByKey
+# @export
 setGeneric("aggregateByKey",
            function(x, zeroValue, seqOp, combOp, numPartitions) {
              standardGeneric("aggregateByKey")
            })
 
-#' @rdname cogroup
-#' @export
+# @rdname cogroup
+# @export
 setGeneric("cogroup",
            function(..., numPartitions) {
              standardGeneric("cogroup")
            },
            signature = "...")
 
-#' @rdname combineByKey
-#' @seealso groupByKey, reduceByKey
-#' @export
+# @rdname combineByKey
+# @seealso groupByKey, reduceByKey
+# @export
 setGeneric("combineByKey",
            function(x, createCombiner, mergeValue, mergeCombiners, numPartitions) {
              standardGeneric("combineByKey")
            })
 
-#' @rdname foldByKey
-#' @seealso aggregateByKey, combineByKey
-#' @export
+# @rdname foldByKey
+# @seealso aggregateByKey, combineByKey
+# @export
 setGeneric("foldByKey",
            function(x, zeroValue, func, numPartitions) {
              standardGeneric("foldByKey")
            })
 
-#' @rdname join-methods
-#' @export
+# @rdname join-methods
+# @export
 setGeneric("fullOuterJoin", function(x, y, numPartitions) { standardGeneric("fullOuterJoin") })
 
-#' @rdname groupByKey
-#' @seealso reduceByKey
-#' @export
+# @rdname groupByKey
+# @seealso reduceByKey
+# @export
 setGeneric("groupByKey", function(x, numPartitions) { standardGeneric("groupByKey") })
 
-#' @rdname join-methods
-#' @export
+# @rdname join-methods
+# @export
 setGeneric("join", function(x, y, ...) { standardGeneric("join") })
 
-#' @rdname join-methods
-#' @export
+# @rdname join-methods
+# @export
 setGeneric("leftOuterJoin", function(x, y, numPartitions) { standardGeneric("leftOuterJoin") })
 
-#' @rdname partitionBy
-#' @export
+# @rdname partitionBy
+# @export
 setGeneric("partitionBy", function(x, numPartitions, ...) { standardGeneric("partitionBy") })
 
-#' @rdname reduceByKey
-#' @seealso groupByKey
-#' @export
+# @rdname reduceByKey
+# @seealso groupByKey
+# @export
 setGeneric("reduceByKey", function(x, combineFunc, numPartitions) { standardGeneric("reduceByKey")})
 
-#' @rdname reduceByKeyLocally
-#' @seealso reduceByKey
-#' @export
+# @rdname reduceByKeyLocally
+# @seealso reduceByKey
+# @export
 setGeneric("reduceByKeyLocally",
            function(x, combineFunc) {
              standardGeneric("reduceByKeyLocally")
            })
 
-#' @rdname join-methods
-#' @export
+# @rdname join-methods
+# @export
 setGeneric("rightOuterJoin", function(x, y, numPartitions) { standardGeneric("rightOuterJoin") })
 
-#' @rdname sortByKey
-#' @export
+# @rdname sortByKey
+# @export
 setGeneric("sortByKey",
            function(x, ascending = TRUE, numPartitions = 1) {
              standardGeneric("sortByKey")
            })
 
-#' @rdname subtract
-#' @export
+# @rdname subtract
+# @export
 setGeneric("subtract",
            function(x, other, numPartitions = 1) {
              standardGeneric("subtract")
            })
 
-#' @rdname subtractByKey
-#' @export
+# @rdname subtractByKey
+# @export
 setGeneric("subtractByKey", 
            function(x, other, numPartitions = 1) {
              standardGeneric("subtractByKey")
@@ -372,8 +372,8 @@ setGeneric("subtractByKey",
 
 ################### Broadcast Variable Methods #################
 
-#' @rdname broadcast
-#' @export
+# @rdname broadcast
+# @export
 setGeneric("value", function(bcast) { standardGeneric("value") })
 
 
@@ -477,8 +477,8 @@ setGeneric("showDF", function(x,...) { standardGeneric("showDF") })
 #' @export
 setGeneric("sortDF", function(x, col, ...) { standardGeneric("sortDF") })
 
-#' @rdname tojson
-#' @export
+# @rdname tojson
+# @export
 setGeneric("toJSON", function(x) { standardGeneric("toJSON") })
 
 #' @rdname DataFrame
diff --git a/R/pkg/R/pairRDD.R b/R/pkg/R/pairRDD.R
index edeb8d9f75f7b..7694652856da5 100644
--- a/R/pkg/R/pairRDD.R
+++ b/R/pkg/R/pairRDD.R
@@ -21,23 +21,23 @@ NULL
 
 ############ Actions and Transformations ############
 
-#' Look up elements of a key in an RDD
-#'
-#' @description
-#' \code{lookup} returns a list of values in this RDD for key key.
-#'
-#' @param x The RDD to collect
-#' @param key The key to look up for
-#' @return a list of values in this RDD for key key
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' pairs <- list(c(1, 1), c(2, 2), c(1, 3))
-#' rdd <- parallelize(sc, pairs)
-#' lookup(rdd, 1) # list(1, 3)
-#'}
-#' @rdname lookup
-#' @aliases lookup,RDD-method
+# Look up elements of a key in an RDD
+#
+# @description
+# \code{lookup} returns a list of values in this RDD for key key.
+#
+# @param x The RDD to collect
+# @param key The key to look up for
+# @return a list of values in this RDD for key key
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# pairs <- list(c(1, 1), c(2, 2), c(1, 3))
+# rdd <- parallelize(sc, pairs)
+# lookup(rdd, 1) # list(1, 3)
+#}
+# @rdname lookup
+# @aliases lookup,RDD-method
 setMethod("lookup",
           signature(x = "RDD", key = "ANY"),
           function(x, key) {
@@ -49,21 +49,21 @@ setMethod("lookup",
             collect(valsRDD)
           })
 
-#' Count the number of elements for each key, and return the result to the
-#' master as lists of (key, count) pairs.
-#'
-#' Same as countByKey in Spark.
-#'
-#' @param x The RDD to count keys.
-#' @return list of (key, count) pairs, where count is number of each key in rdd.
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, list(c("a", 1), c("b", 1), c("a", 1)))
-#' countByKey(rdd) # ("a", 2L), ("b", 1L)
-#'}
-#' @rdname countByKey
-#' @aliases countByKey,RDD-method
+# Count the number of elements for each key, and return the result to the
+# master as lists of (key, count) pairs.
+#
+# Same as countByKey in Spark.
+#
+# @param x The RDD to count keys.
+# @return list of (key, count) pairs, where count is number of each key in rdd.
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, list(c("a", 1), c("b", 1), c("a", 1)))
+# countByKey(rdd) # ("a", 2L), ("b", 1L)
+#}
+# @rdname countByKey
+# @aliases countByKey,RDD-method
 setMethod("countByKey",
           signature(x = "RDD"),
           function(x) {
@@ -71,17 +71,17 @@ setMethod("countByKey",
             countByValue(keys)
           })
 
-#' Return an RDD with the keys of each tuple.
-#'
-#' @param x The RDD from which the keys of each tuple is returned.
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, list(list(1, 2), list(3, 4)))
-#' collect(keys(rdd)) # list(1, 3)
-#'}
-#' @rdname keys
-#' @aliases keys,RDD
+# Return an RDD with the keys of each tuple.
+#
+# @param x The RDD from which the keys of each tuple is returned.
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, list(list(1, 2), list(3, 4)))
+# collect(keys(rdd)) # list(1, 3)
+#}
+# @rdname keys
+# @aliases keys,RDD
 setMethod("keys",
           signature(x = "RDD"),
           function(x) {
@@ -91,17 +91,17 @@ setMethod("keys",
             lapply(x, func)
           })
 
-#' Return an RDD with the values of each tuple.
-#'
-#' @param x The RDD from which the values of each tuple is returned.
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, list(list(1, 2), list(3, 4)))
-#' collect(values(rdd)) # list(2, 4)
-#'}
-#' @rdname values
-#' @aliases values,RDD
+# Return an RDD with the values of each tuple.
+#
+# @param x The RDD from which the values of each tuple is returned.
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, list(list(1, 2), list(3, 4)))
+# collect(values(rdd)) # list(2, 4)
+#}
+# @rdname values
+# @aliases values,RDD
 setMethod("values",
           signature(x = "RDD"),
           function(x) {
@@ -111,23 +111,23 @@ setMethod("values",
             lapply(x, func)
           })
 
-#' Applies a function to all values of the elements, without modifying the keys.
-#'
-#' The same as `mapValues()' in Spark.
-#'
-#' @param X The RDD to apply the transformation.
-#' @param FUN the transformation to apply on the value of each element.
-#' @return a new RDD created by the transformation.
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, 1:10)
-#' makePairs <- lapply(rdd, function(x) { list(x, x) })
-#' collect(mapValues(makePairs, function(x) { x * 2) })
-#' Output: list(list(1,2), list(2,4), list(3,6), ...)
-#'}
-#' @rdname mapValues
-#' @aliases mapValues,RDD,function-method
+# Applies a function to all values of the elements, without modifying the keys.
+#
+# The same as `mapValues()' in Spark.
+#
+# @param X The RDD to apply the transformation.
+# @param FUN the transformation to apply on the value of each element.
+# @return a new RDD created by the transformation.
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, 1:10)
+# makePairs <- lapply(rdd, function(x) { list(x, x) })
+# collect(mapValues(makePairs, function(x) { x * 2) })
+# Output: list(list(1,2), list(2,4), list(3,6), ...)
+#}
+# @rdname mapValues
+# @aliases mapValues,RDD,function-method
 setMethod("mapValues",
           signature(X = "RDD", FUN = "function"),
           function(X, FUN) {
@@ -137,23 +137,23 @@ setMethod("mapValues",
             lapply(X, func)
           })
 
-#' Pass each value in the key-value pair RDD through a flatMap function without
-#' changing the keys; this also retains the original RDD's partitioning.
-#'
-#' The same as 'flatMapValues()' in Spark.
-#'
-#' @param X The RDD to apply the transformation.
-#' @param FUN the transformation to apply on the value of each element.
-#' @return a new RDD created by the transformation.
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, list(list(1, c(1,2)), list(2, c(3,4))))
-#' collect(flatMapValues(rdd, function(x) { x }))
-#' Output: list(list(1,1), list(1,2), list(2,3), list(2,4))
-#'}
-#' @rdname flatMapValues
-#' @aliases flatMapValues,RDD,function-method
+# Pass each value in the key-value pair RDD through a flatMap function without
+# changing the keys; this also retains the original RDD's partitioning.
+#
+# The same as 'flatMapValues()' in Spark.
+#
+# @param X The RDD to apply the transformation.
+# @param FUN the transformation to apply on the value of each element.
+# @return a new RDD created by the transformation.
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, list(list(1, c(1,2)), list(2, c(3,4))))
+# collect(flatMapValues(rdd, function(x) { x }))
+# Output: list(list(1,1), list(1,2), list(2,3), list(2,4))
+#}
+# @rdname flatMapValues
+# @aliases flatMapValues,RDD,function-method
 setMethod("flatMapValues",
           signature(X = "RDD", FUN = "function"),
           function(X, FUN) {
@@ -165,30 +165,30 @@ setMethod("flatMapValues",
 
 ############ Shuffle Functions ############
 
-#' Partition an RDD by key
-#'
-#' This function operates on RDDs where every element is of the form list(K, V) or c(K, V).
-#' For each element of this RDD, the partitioner is used to compute a hash
-#' function and the RDD is partitioned using this hash value.
-#'
-#' @param x The RDD to partition. Should be an RDD where each element is
-#'             list(K, V) or c(K, V).
-#' @param numPartitions Number of partitions to create.
-#' @param ... Other optional arguments to partitionBy.
-#'
-#' @param partitionFunc The partition function to use. Uses a default hashCode
-#'                      function if not provided
-#' @return An RDD partitioned using the specified partitioner.
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' pairs <- list(list(1, 2), list(1.1, 3), list(1, 4))
-#' rdd <- parallelize(sc, pairs)
-#' parts <- partitionBy(rdd, 2L)
-#' collectPartition(parts, 0L) # First partition should contain list(1, 2) and list(1, 4)
-#'}
-#' @rdname partitionBy
-#' @aliases partitionBy,RDD,integer-method
+# Partition an RDD by key
+#
+# This function operates on RDDs where every element is of the form list(K, V) or c(K, V).
+# For each element of this RDD, the partitioner is used to compute a hash
+# function and the RDD is partitioned using this hash value.
+#
+# @param x The RDD to partition. Should be an RDD where each element is
+#             list(K, V) or c(K, V).
+# @param numPartitions Number of partitions to create.
+# @param ... Other optional arguments to partitionBy.
+#
+# @param partitionFunc The partition function to use. Uses a default hashCode
+#                      function if not provided
+# @return An RDD partitioned using the specified partitioner.
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# pairs <- list(list(1, 2), list(1.1, 3), list(1, 4))
+# rdd <- parallelize(sc, pairs)
+# parts <- partitionBy(rdd, 2L)
+# collectPartition(parts, 0L) # First partition should contain list(1, 2) and list(1, 4)
+#}
+# @rdname partitionBy
+# @aliases partitionBy,RDD,integer-method
 setMethod("partitionBy",
           signature(x = "RDD", numPartitions = "numeric"),
           function(x, numPartitions, partitionFunc = hashCode) {
@@ -234,27 +234,27 @@ setMethod("partitionBy",
             RDD(r, serializedMode = "byte")
           })
 
-#' Group values by key
-#'
-#' This function operates on RDDs where every element is of the form list(K, V) or c(K, V).
-#' and group values for each key in the RDD into a single sequence.
-#'
-#' @param x The RDD to group. Should be an RDD where each element is
-#'             list(K, V) or c(K, V).
-#' @param numPartitions Number of partitions to create.
-#' @return An RDD where each element is list(K, list(V))
-#' @seealso reduceByKey
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' pairs <- list(list(1, 2), list(1.1, 3), list(1, 4))
-#' rdd <- parallelize(sc, pairs)
-#' parts <- groupByKey(rdd, 2L)
-#' grouped <- collect(parts)
-#' grouped[[1]] # Should be a list(1, list(2, 4))
-#'}
-#' @rdname groupByKey
-#' @aliases groupByKey,RDD,integer-method
+# Group values by key
+#
+# This function operates on RDDs where every element is of the form list(K, V) or c(K, V).
+# and group values for each key in the RDD into a single sequence.
+#
+# @param x The RDD to group. Should be an RDD where each element is
+#             list(K, V) or c(K, V).
+# @param numPartitions Number of partitions to create.
+# @return An RDD where each element is list(K, list(V))
+# @seealso reduceByKey
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# pairs <- list(list(1, 2), list(1.1, 3), list(1, 4))
+# rdd <- parallelize(sc, pairs)
+# parts <- groupByKey(rdd, 2L)
+# grouped <- collect(parts)
+# grouped[[1]] # Should be a list(1, list(2, 4))
+#}
+# @rdname groupByKey
+# @aliases groupByKey,RDD,integer-method
 setMethod("groupByKey",
           signature(x = "RDD", numPartitions = "numeric"),
           function(x, numPartitions) {
@@ -292,28 +292,28 @@ setMethod("groupByKey",
             lapplyPartition(shuffled, groupVals)
           })
 
-#' Merge values by key
-#'
-#' This function operates on RDDs where every element is of the form list(K, V) or c(K, V).
-#' and merges the values for each key using an associative reduce function.
-#'
-#' @param x The RDD to reduce by key. Should be an RDD where each element is
-#'             list(K, V) or c(K, V).
-#' @param combineFunc The associative reduce function to use.
-#' @param numPartitions Number of partitions to create.
-#' @return An RDD where each element is list(K, V') where V' is the merged
-#'         value
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' pairs <- list(list(1, 2), list(1.1, 3), list(1, 4))
-#' rdd <- parallelize(sc, pairs)
-#' parts <- reduceByKey(rdd, "+", 2L)
-#' reduced <- collect(parts)
-#' reduced[[1]] # Should be a list(1, 6)
-#'}
-#' @rdname reduceByKey
-#' @aliases reduceByKey,RDD,integer-method
+# Merge values by key
+#
+# This function operates on RDDs where every element is of the form list(K, V) or c(K, V).
+# and merges the values for each key using an associative reduce function.
+#
+# @param x The RDD to reduce by key. Should be an RDD where each element is
+#             list(K, V) or c(K, V).
+# @param combineFunc The associative reduce function to use.
+# @param numPartitions Number of partitions to create.
+# @return An RDD where each element is list(K, V') where V' is the merged
+#         value
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# pairs <- list(list(1, 2), list(1.1, 3), list(1, 4))
+# rdd <- parallelize(sc, pairs)
+# parts <- reduceByKey(rdd, "+", 2L)
+# reduced <- collect(parts)
+# reduced[[1]] # Should be a list(1, 6)
+#}
+# @rdname reduceByKey
+# @aliases reduceByKey,RDD,integer-method
 setMethod("reduceByKey",
           signature(x = "RDD", combineFunc = "ANY", numPartitions = "numeric"),
           function(x, combineFunc, numPartitions) {
@@ -333,27 +333,27 @@ setMethod("reduceByKey",
             lapplyPartition(shuffled, reduceVals)
           })
 
-#' Merge values by key locally
-#'
-#' This function operates on RDDs where every element is of the form list(K, V) or c(K, V).
-#' and merges the values for each key using an associative reduce function, but return the
-#' results immediately to the driver as an R list.
-#'
-#' @param x The RDD to reduce by key. Should be an RDD where each element is
-#'             list(K, V) or c(K, V).
-#' @param combineFunc The associative reduce function to use.
-#' @return A list of elements of type list(K, V') where V' is the merged value for each key
-#' @seealso reduceByKey
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' pairs <- list(list(1, 2), list(1.1, 3), list(1, 4))
-#' rdd <- parallelize(sc, pairs)
-#' reduced <- reduceByKeyLocally(rdd, "+")
-#' reduced # list(list(1, 6), list(1.1, 3))
-#'}
-#' @rdname reduceByKeyLocally
-#' @aliases reduceByKeyLocally,RDD,integer-method
+# Merge values by key locally
+#
+# This function operates on RDDs where every element is of the form list(K, V) or c(K, V).
+# and merges the values for each key using an associative reduce function, but return the
+# results immediately to the driver as an R list.
+#
+# @param x The RDD to reduce by key. Should be an RDD where each element is
+#             list(K, V) or c(K, V).
+# @param combineFunc The associative reduce function to use.
+# @return A list of elements of type list(K, V') where V' is the merged value for each key
+# @seealso reduceByKey
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# pairs <- list(list(1, 2), list(1.1, 3), list(1, 4))
+# rdd <- parallelize(sc, pairs)
+# reduced <- reduceByKeyLocally(rdd, "+")
+# reduced # list(list(1, 6), list(1.1, 3))
+#}
+# @rdname reduceByKeyLocally
+# @aliases reduceByKeyLocally,RDD,integer-method
 setMethod("reduceByKeyLocally",
           signature(x = "RDD", combineFunc = "ANY"),
           function(x, combineFunc) {
@@ -385,41 +385,41 @@ setMethod("reduceByKeyLocally",
             convertEnvsToList(merged[[1]], merged[[2]])
           })
 
-#' Combine values by key
-#'
-#' Generic function to combine the elements for each key using a custom set of
-#' aggregation functions. Turns an RDD[(K, V)] into a result of type RDD[(K, C)],
-#' for a "combined type" C. Note that V and C can be different -- for example, one
-#' might group an RDD of type (Int, Int) into an RDD of type (Int, Seq[Int]).
-
-#' Users provide three functions:
-#' \itemize{
-#'   \item createCombiner, which turns a V into a C (e.g., creates a one-element list)
-#'   \item mergeValue, to merge a V into a C (e.g., adds it to the end of a list) -
-#'   \item mergeCombiners, to combine two C's into a single one (e.g., concatentates
-#'    two lists).
-#' }
-#'
-#' @param x The RDD to combine. Should be an RDD where each element is
-#'             list(K, V) or c(K, V).
-#' @param createCombiner Create a combiner (C) given a value (V)
-#' @param mergeValue Merge the given value (V) with an existing combiner (C)
-#' @param mergeCombiners Merge two combiners and return a new combiner
-#' @param numPartitions Number of partitions to create.
-#' @return An RDD where each element is list(K, C) where C is the combined type
-#'
-#' @seealso groupByKey, reduceByKey
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' pairs <- list(list(1, 2), list(1.1, 3), list(1, 4))
-#' rdd <- parallelize(sc, pairs)
-#' parts <- combineByKey(rdd, function(x) { x }, "+", "+", 2L)
-#' combined <- collect(parts)
-#' combined[[1]] # Should be a list(1, 6)
-#'}
-#' @rdname combineByKey
-#' @aliases combineByKey,RDD,ANY,ANY,ANY,integer-method
+# Combine values by key
+#
+# Generic function to combine the elements for each key using a custom set of
+# aggregation functions. Turns an RDD[(K, V)] into a result of type RDD[(K, C)],
+# for a "combined type" C. Note that V and C can be different -- for example, one
+# might group an RDD of type (Int, Int) into an RDD of type (Int, Seq[Int]).
+
+# Users provide three functions:
+# \itemize{
+#   \item createCombiner, which turns a V into a C (e.g., creates a one-element list)
+#   \item mergeValue, to merge a V into a C (e.g., adds it to the end of a list) -
+#   \item mergeCombiners, to combine two C's into a single one (e.g., concatentates
+#    two lists).
+# }
+#
+# @param x The RDD to combine. Should be an RDD where each element is
+#             list(K, V) or c(K, V).
+# @param createCombiner Create a combiner (C) given a value (V)
+# @param mergeValue Merge the given value (V) with an existing combiner (C)
+# @param mergeCombiners Merge two combiners and return a new combiner
+# @param numPartitions Number of partitions to create.
+# @return An RDD where each element is list(K, C) where C is the combined type
+#
+# @seealso groupByKey, reduceByKey
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# pairs <- list(list(1, 2), list(1.1, 3), list(1, 4))
+# rdd <- parallelize(sc, pairs)
+# parts <- combineByKey(rdd, function(x) { x }, "+", "+", 2L)
+# combined <- collect(parts)
+# combined[[1]] # Should be a list(1, 6)
+#}
+# @rdname combineByKey
+# @aliases combineByKey,RDD,ANY,ANY,ANY,integer-method
 setMethod("combineByKey",
           signature(x = "RDD", createCombiner = "ANY", mergeValue = "ANY",
                     mergeCombiners = "ANY", numPartitions = "numeric"),
@@ -451,36 +451,36 @@ setMethod("combineByKey",
             lapplyPartition(shuffled, mergeAfterShuffle)
           })
 
-#' Aggregate a pair RDD by each key.
-#'
-#' Aggregate the values of each key in an RDD, using given combine functions
-#' and a neutral "zero value". This function can return a different result type,
-#' U, than the type of the values in this RDD, V. Thus, we need one operation
-#' for merging a V into a U and one operation for merging two U's, The former
-#' operation is used for merging values within a partition, and the latter is
-#' used for merging values between partitions. To avoid memory allocation, both
-#' of these functions are allowed to modify and return their first argument
-#' instead of creating a new U.
-#'
-#' @param x An RDD.
-#' @param zeroValue A neutral "zero value".
-#' @param seqOp A function to aggregate the values of each key. It may return
-#'              a different result type from the type of the values.
-#' @param combOp A function to aggregate results of seqOp.
-#' @return An RDD containing the aggregation result.
-#' @seealso foldByKey, combineByKey
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, list(list(1, 1), list(1, 2), list(2, 3), list(2, 4)))
-#' zeroValue <- list(0, 0)
-#' seqOp <- function(x, y) { list(x[[1]] + y, x[[2]] + 1) }
-#' combOp <- function(x, y) { list(x[[1]] + y[[1]], x[[2]] + y[[2]]) }
-#' aggregateByKey(rdd, zeroValue, seqOp, combOp, 2L)
-#'   # list(list(1, list(3, 2)), list(2, list(7, 2)))
-#'}
-#' @rdname aggregateByKey
-#' @aliases aggregateByKey,RDD,ANY,ANY,ANY,integer-method
+# Aggregate a pair RDD by each key.
+#
+# Aggregate the values of each key in an RDD, using given combine functions
+# and a neutral "zero value". This function can return a different result type,
+# U, than the type of the values in this RDD, V. Thus, we need one operation
+# for merging a V into a U and one operation for merging two U's, The former
+# operation is used for merging values within a partition, and the latter is
+# used for merging values between partitions. To avoid memory allocation, both
+# of these functions are allowed to modify and return their first argument
+# instead of creating a new U.
+#
+# @param x An RDD.
+# @param zeroValue A neutral "zero value".
+# @param seqOp A function to aggregate the values of each key. It may return
+#              a different result type from the type of the values.
+# @param combOp A function to aggregate results of seqOp.
+# @return An RDD containing the aggregation result.
+# @seealso foldByKey, combineByKey
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, list(list(1, 1), list(1, 2), list(2, 3), list(2, 4)))
+# zeroValue <- list(0, 0)
+# seqOp <- function(x, y) { list(x[[1]] + y, x[[2]] + 1) }
+# combOp <- function(x, y) { list(x[[1]] + y[[1]], x[[2]] + y[[2]]) }
+# aggregateByKey(rdd, zeroValue, seqOp, combOp, 2L)
+#   # list(list(1, list(3, 2)), list(2, list(7, 2)))
+#}
+# @rdname aggregateByKey
+# @aliases aggregateByKey,RDD,ANY,ANY,ANY,integer-method
 setMethod("aggregateByKey",
           signature(x = "RDD", zeroValue = "ANY", seqOp = "ANY",
                     combOp = "ANY", numPartitions = "numeric"),
@@ -492,26 +492,26 @@ setMethod("aggregateByKey",
             combineByKey(x, createCombiner, seqOp, combOp, numPartitions)
           })
 
-#' Fold a pair RDD by each key.
-#'
-#' Aggregate the values of each key in an RDD, using an associative function "func"
-#' and a neutral "zero value" which may be added to the result an arbitrary
-#' number of times, and must not change the result (e.g., 0 for addition, or
-#' 1 for multiplication.).
-#'
-#' @param x An RDD.
-#' @param zeroValue A neutral "zero value".
-#' @param func An associative function for folding values of each key.
-#' @return An RDD containing the aggregation result.
-#' @seealso aggregateByKey, combineByKey
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, list(list(1, 1), list(1, 2), list(2, 3), list(2, 4)))
-#' foldByKey(rdd, 0, "+", 2L) # list(list(1, 3), list(2, 7))
-#'}
-#' @rdname foldByKey
-#' @aliases foldByKey,RDD,ANY,ANY,integer-method
+# Fold a pair RDD by each key.
+#
+# Aggregate the values of each key in an RDD, using an associative function "func"
+# and a neutral "zero value" which may be added to the result an arbitrary
+# number of times, and must not change the result (e.g., 0 for addition, or
+# 1 for multiplication.).
+#
+# @param x An RDD.
+# @param zeroValue A neutral "zero value".
+# @param func An associative function for folding values of each key.
+# @return An RDD containing the aggregation result.
+# @seealso aggregateByKey, combineByKey
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, list(list(1, 1), list(1, 2), list(2, 3), list(2, 4)))
+# foldByKey(rdd, 0, "+", 2L) # list(list(1, 3), list(2, 7))
+#}
+# @rdname foldByKey
+# @aliases foldByKey,RDD,ANY,ANY,integer-method
 setMethod("foldByKey",
           signature(x = "RDD", zeroValue = "ANY",
                     func = "ANY", numPartitions = "numeric"),
@@ -521,28 +521,28 @@ setMethod("foldByKey",
 
 ############ Binary Functions #############
 
-#' Join two RDDs
-#'
-#' @description
-#' \code{join} This function joins two RDDs where every element is of the form list(K, V).
-#' The key types of the two RDDs should be the same.
-#'
-#' @param x An RDD to be joined. Should be an RDD where each element is
-#'             list(K, V).
-#' @param y An RDD to be joined. Should be an RDD where each element is
-#'             list(K, V).
-#' @param numPartitions Number of partitions to create.
-#' @return a new RDD containing all pairs of elements with matching keys in
-#'         two input RDDs.
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd1 <- parallelize(sc, list(list(1, 1), list(2, 4)))
-#' rdd2 <- parallelize(sc, list(list(1, 2), list(1, 3)))
-#' join(rdd1, rdd2, 2L) # list(list(1, list(1, 2)), list(1, list(1, 3))
-#'}
-#' @rdname join-methods
-#' @aliases join,RDD,RDD-method
+# Join two RDDs
+#
+# @description
+# \code{join} This function joins two RDDs where every element is of the form list(K, V).
+# The key types of the two RDDs should be the same.
+#
+# @param x An RDD to be joined. Should be an RDD where each element is
+#             list(K, V).
+# @param y An RDD to be joined. Should be an RDD where each element is
+#             list(K, V).
+# @param numPartitions Number of partitions to create.
+# @return a new RDD containing all pairs of elements with matching keys in
+#         two input RDDs.
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd1 <- parallelize(sc, list(list(1, 1), list(2, 4)))
+# rdd2 <- parallelize(sc, list(list(1, 2), list(1, 3)))
+# join(rdd1, rdd2, 2L) # list(list(1, list(1, 2)), list(1, list(1, 3))
+#}
+# @rdname join-methods
+# @aliases join,RDD,RDD-method
 setMethod("join",
           signature(x = "RDD", y = "RDD"),
           function(x, y, numPartitions) {
@@ -557,30 +557,30 @@ setMethod("join",
                                     doJoin)
           })
 
-#' Left outer join two RDDs
-#'
-#' @description
-#' \code{leftouterjoin} This function left-outer-joins two RDDs where every element is of the form list(K, V).
-#' The key types of the two RDDs should be the same.
-#'
-#' @param x An RDD to be joined. Should be an RDD where each element is
-#'             list(K, V).
-#' @param y An RDD to be joined. Should be an RDD where each element is
-#'             list(K, V).
-#' @param numPartitions Number of partitions to create.
-#' @return For each element (k, v) in x, the resulting RDD will either contain
-#'         all pairs (k, (v, w)) for (k, w) in rdd2, or the pair (k, (v, NULL))
-#'         if no elements in rdd2 have key k.
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd1 <- parallelize(sc, list(list(1, 1), list(2, 4)))
-#' rdd2 <- parallelize(sc, list(list(1, 2), list(1, 3)))
-#' leftOuterJoin(rdd1, rdd2, 2L)
-#' # list(list(1, list(1, 2)), list(1, list(1, 3)), list(2, list(4, NULL)))
-#'}
-#' @rdname join-methods
-#' @aliases leftOuterJoin,RDD,RDD-method
+# Left outer join two RDDs
+#
+# @description
+# \code{leftouterjoin} This function left-outer-joins two RDDs where every element is of the form list(K, V).
+# The key types of the two RDDs should be the same.
+#
+# @param x An RDD to be joined. Should be an RDD where each element is
+#             list(K, V).
+# @param y An RDD to be joined. Should be an RDD where each element is
+#             list(K, V).
+# @param numPartitions Number of partitions to create.
+# @return For each element (k, v) in x, the resulting RDD will either contain
+#         all pairs (k, (v, w)) for (k, w) in rdd2, or the pair (k, (v, NULL))
+#         if no elements in rdd2 have key k.
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd1 <- parallelize(sc, list(list(1, 1), list(2, 4)))
+# rdd2 <- parallelize(sc, list(list(1, 2), list(1, 3)))
+# leftOuterJoin(rdd1, rdd2, 2L)
+# # list(list(1, list(1, 2)), list(1, list(1, 3)), list(2, list(4, NULL)))
+#}
+# @rdname join-methods
+# @aliases leftOuterJoin,RDD,RDD-method
 setMethod("leftOuterJoin",
           signature(x = "RDD", y = "RDD", numPartitions = "numeric"),
           function(x, y, numPartitions) {
@@ -594,30 +594,30 @@ setMethod("leftOuterJoin",
             joined <- flatMapValues(groupByKey(unionRDD(xTagged, yTagged), numPartitions), doJoin)
           })
 
-#' Right outer join two RDDs
-#'
-#' @description
-#' \code{rightouterjoin} This function right-outer-joins two RDDs where every element is of the form list(K, V).
-#' The key types of the two RDDs should be the same.
-#'
-#' @param x An RDD to be joined. Should be an RDD where each element is
-#'             list(K, V).
-#' @param y An RDD to be joined. Should be an RDD where each element is
-#'             list(K, V).
-#' @param numPartitions Number of partitions to create.
-#' @return For each element (k, w) in y, the resulting RDD will either contain
-#'         all pairs (k, (v, w)) for (k, v) in x, or the pair (k, (NULL, w))
-#'         if no elements in x have key k.
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd1 <- parallelize(sc, list(list(1, 2), list(1, 3)))
-#' rdd2 <- parallelize(sc, list(list(1, 1), list(2, 4)))
-#' rightOuterJoin(rdd1, rdd2, 2L)
-#' # list(list(1, list(2, 1)), list(1, list(3, 1)), list(2, list(NULL, 4)))
-#'}
-#' @rdname join-methods
-#' @aliases rightOuterJoin,RDD,RDD-method
+# Right outer join two RDDs
+#
+# @description
+# \code{rightouterjoin} This function right-outer-joins two RDDs where every element is of the form list(K, V).
+# The key types of the two RDDs should be the same.
+#
+# @param x An RDD to be joined. Should be an RDD where each element is
+#             list(K, V).
+# @param y An RDD to be joined. Should be an RDD where each element is
+#             list(K, V).
+# @param numPartitions Number of partitions to create.
+# @return For each element (k, w) in y, the resulting RDD will either contain
+#         all pairs (k, (v, w)) for (k, v) in x, or the pair (k, (NULL, w))
+#         if no elements in x have key k.
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd1 <- parallelize(sc, list(list(1, 2), list(1, 3)))
+# rdd2 <- parallelize(sc, list(list(1, 1), list(2, 4)))
+# rightOuterJoin(rdd1, rdd2, 2L)
+# # list(list(1, list(2, 1)), list(1, list(3, 1)), list(2, list(NULL, 4)))
+#}
+# @rdname join-methods
+# @aliases rightOuterJoin,RDD,RDD-method
 setMethod("rightOuterJoin",
           signature(x = "RDD", y = "RDD", numPartitions = "numeric"),
           function(x, y, numPartitions) {
@@ -631,33 +631,33 @@ setMethod("rightOuterJoin",
             joined <- flatMapValues(groupByKey(unionRDD(xTagged, yTagged), numPartitions), doJoin)
           })
 
-#' Full outer join two RDDs
-#'
-#' @description
-#' \code{fullouterjoin} This function full-outer-joins two RDDs where every element is of the form list(K, V).
-#' The key types of the two RDDs should be the same.
-#'
-#' @param x An RDD to be joined. Should be an RDD where each element is
-#'             list(K, V).
-#' @param y An RDD to be joined. Should be an RDD where each element is
-#'             list(K, V).
-#' @param numPartitions Number of partitions to create.
-#' @return For each element (k, v) in x and (k, w) in y, the resulting RDD
-#'         will contain all pairs (k, (v, w)) for both (k, v) in x and
-#'         (k, w) in y, or the pair (k, (NULL, w))/(k, (v, NULL)) if no elements
-#'         in x/y have key k.
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd1 <- parallelize(sc, list(list(1, 2), list(1, 3), list(3, 3)))
-#' rdd2 <- parallelize(sc, list(list(1, 1), list(2, 4)))
-#' fullOuterJoin(rdd1, rdd2, 2L) # list(list(1, list(2, 1)),
-#'                               #      list(1, list(3, 1)),
-#'                               #      list(2, list(NULL, 4)))
-#'                               #      list(3, list(3, NULL)),
-#'}
-#' @rdname join-methods
-#' @aliases fullOuterJoin,RDD,RDD-method
+# Full outer join two RDDs
+#
+# @description
+# \code{fullouterjoin} This function full-outer-joins two RDDs where every element is of the form list(K, V).
+# The key types of the two RDDs should be the same.
+#
+# @param x An RDD to be joined. Should be an RDD where each element is
+#             list(K, V).
+# @param y An RDD to be joined. Should be an RDD where each element is
+#             list(K, V).
+# @param numPartitions Number of partitions to create.
+# @return For each element (k, v) in x and (k, w) in y, the resulting RDD
+#         will contain all pairs (k, (v, w)) for both (k, v) in x and
+#         (k, w) in y, or the pair (k, (NULL, w))/(k, (v, NULL)) if no elements
+#         in x/y have key k.
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd1 <- parallelize(sc, list(list(1, 2), list(1, 3), list(3, 3)))
+# rdd2 <- parallelize(sc, list(list(1, 1), list(2, 4)))
+# fullOuterJoin(rdd1, rdd2, 2L) # list(list(1, list(2, 1)),
+#                               #      list(1, list(3, 1)),
+#                               #      list(2, list(NULL, 4)))
+#                               #      list(3, list(3, NULL)),
+#}
+# @rdname join-methods
+# @aliases fullOuterJoin,RDD,RDD-method
 setMethod("fullOuterJoin",
           signature(x = "RDD", y = "RDD", numPartitions = "numeric"),
           function(x, y, numPartitions) {
@@ -671,23 +671,23 @@ setMethod("fullOuterJoin",
             joined <- flatMapValues(groupByKey(unionRDD(xTagged, yTagged), numPartitions), doJoin)
           })
 
-#' For each key k in several RDDs, return a resulting RDD that
-#' whose values are a list of values for the key in all RDDs.
-#'
-#' @param ... Several RDDs.
-#' @param numPartitions Number of partitions to create.
-#' @return a new RDD containing all pairs of elements with values in a list
-#' in all RDDs.
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd1 <- parallelize(sc, list(list(1, 1), list(2, 4)))
-#' rdd2 <- parallelize(sc, list(list(1, 2), list(1, 3)))
-#' cogroup(rdd1, rdd2, numPartitions = 2L)
-#' # list(list(1, list(1, list(2, 3))), list(2, list(list(4), list()))
-#'}
-#' @rdname cogroup
-#' @aliases cogroup,RDD-method
+# For each key k in several RDDs, return a resulting RDD that
+# whose values are a list of values for the key in all RDDs.
+#
+# @param ... Several RDDs.
+# @param numPartitions Number of partitions to create.
+# @return a new RDD containing all pairs of elements with values in a list
+# in all RDDs.
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd1 <- parallelize(sc, list(list(1, 1), list(2, 4)))
+# rdd2 <- parallelize(sc, list(list(1, 2), list(1, 3)))
+# cogroup(rdd1, rdd2, numPartitions = 2L)
+# # list(list(1, list(1, list(2, 3))), list(2, list(list(4), list()))
+#}
+# @rdname cogroup
+# @aliases cogroup,RDD-method
 setMethod("cogroup",
           "RDD",
           function(..., numPartitions) {
@@ -723,20 +723,20 @@ setMethod("cogroup",
                                      group.func)
           })
 
-#' Sort a (k, v) pair RDD by k.
-#'
-#' @param x A (k, v) pair RDD to be sorted.
-#' @param ascending A flag to indicate whether the sorting is ascending or descending.
-#' @param numPartitions Number of partitions to create.
-#' @return An RDD where all (k, v) pair elements are sorted.
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, list(list(3, 1), list(2, 2), list(1, 3)))
-#' collect(sortByKey(rdd)) # list (list(1, 3), list(2, 2), list(3, 1))
-#'}
-#' @rdname sortByKey
-#' @aliases sortByKey,RDD,RDD-method
+# Sort a (k, v) pair RDD by k.
+#
+# @param x A (k, v) pair RDD to be sorted.
+# @param ascending A flag to indicate whether the sorting is ascending or descending.
+# @param numPartitions Number of partitions to create.
+# @return An RDD where all (k, v) pair elements are sorted.
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, list(list(3, 1), list(2, 2), list(1, 3)))
+# collect(sortByKey(rdd)) # list (list(1, 3), list(2, 2), list(3, 1))
+#}
+# @rdname sortByKey
+# @aliases sortByKey,RDD,RDD-method
 setMethod("sortByKey",
           signature(x = "RDD"),
           function(x, ascending = TRUE, numPartitions = SparkR:::numPartitions(x)) {
@@ -785,25 +785,25 @@ setMethod("sortByKey",
             lapplyPartition(newRDD, partitionFunc)
           })
           
-#' Subtract a pair RDD with another pair RDD.
-#'
-#' Return an RDD with the pairs from x whose keys are not in other.
-#'
-#' @param x An RDD.
-#' @param other An RDD.
-#' @param numPartitions Number of the partitions in the result RDD.
-#' @return An RDD with the pairs from x whose keys are not in other.
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd1 <- parallelize(sc, list(list("a", 1), list("b", 4),
-#'                              list("b", 5), list("a", 2)))
-#' rdd2 <- parallelize(sc, list(list("a", 3), list("c", 1)))
-#' collect(subtractByKey(rdd1, rdd2))
-#' # list(list("b", 4), list("b", 5))
-#'}
-#' @rdname subtractByKey
-#' @aliases subtractByKey,RDD
+# Subtract a pair RDD with another pair RDD.
+#
+# Return an RDD with the pairs from x whose keys are not in other.
+#
+# @param x An RDD.
+# @param other An RDD.
+# @param numPartitions Number of the partitions in the result RDD.
+# @return An RDD with the pairs from x whose keys are not in other.
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd1 <- parallelize(sc, list(list("a", 1), list("b", 4),
+#                              list("b", 5), list("a", 2)))
+# rdd2 <- parallelize(sc, list(list("a", 3), list("c", 1)))
+# collect(subtractByKey(rdd1, rdd2))
+# # list(list("b", 4), list("b", 5))
+#}
+# @rdname subtractByKey
+# @aliases subtractByKey,RDD
 setMethod("subtractByKey",
           signature(x = "RDD", other = "RDD"),
           function(x, other, numPartitions = SparkR:::numPartitions(x)) {
@@ -819,41 +819,41 @@ setMethod("subtractByKey",
                           function (v) { v[[1]] })
           })
 
-#' Return a subset of this RDD sampled by key.
-#' 
-#' @description
-#' \code{sampleByKey} Create a sample of this RDD using variable sampling rates
-#' for different keys as specified by fractions, a key to sampling rate map.
-#'
-#' @param x The RDD to sample elements by key, where each element is
-#'             list(K, V) or c(K, V).
-#' @param withReplacement Sampling with replacement or not
-#' @param fraction The (rough) sample target fraction
-#' @param seed Randomness seed value
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, 1:3000)
-#' pairs <- lapply(rdd, function(x) { if (x %% 3 == 0) list("a", x)
-#'                                    else { if (x %% 3 == 1) list("b", x) else list("c", x) }})
-#' fractions <- list(a = 0.2, b = 0.1, c = 0.3)
-#' sample <- sampleByKey(pairs, FALSE, fractions, 1618L)
-#' 100 < length(lookup(sample, "a")) && 300 > length(lookup(sample, "a")) # TRUE
-#' 50 < length(lookup(sample, "b")) && 150 > length(lookup(sample, "b")) # TRUE
-#' 200 < length(lookup(sample, "c")) && 400 > length(lookup(sample, "c")) # TRUE
-#' lookup(sample, "a")[which.min(lookup(sample, "a"))] >= 0 # TRUE
-#' lookup(sample, "a")[which.max(lookup(sample, "a"))] <= 2000 # TRUE
-#' lookup(sample, "b")[which.min(lookup(sample, "b"))] >= 0 # TRUE
-#' lookup(sample, "b")[which.max(lookup(sample, "b"))] <= 2000 # TRUE
-#' lookup(sample, "c")[which.min(lookup(sample, "c"))] >= 0 # TRUE
-#' lookup(sample, "c")[which.max(lookup(sample, "c"))] <= 2000 # TRUE
-#' fractions <- list(a = 0.2, b = 0.1, c = 0.3, d = 0.4)
-#' sample <- sampleByKey(pairs, FALSE, fractions, 1618L) # Key "d" will be ignored
-#' fractions <- list(a = 0.2, b = 0.1)
-#' sample <- sampleByKey(pairs, FALSE, fractions, 1618L) # KeyError: "c"
-#'}
-#' @rdname sampleByKey
-#' @aliases sampleByKey,RDD-method
+# Return a subset of this RDD sampled by key.
+# 
+# @description
+# \code{sampleByKey} Create a sample of this RDD using variable sampling rates
+# for different keys as specified by fractions, a key to sampling rate map.
+#
+# @param x The RDD to sample elements by key, where each element is
+#             list(K, V) or c(K, V).
+# @param withReplacement Sampling with replacement or not
+# @param fraction The (rough) sample target fraction
+# @param seed Randomness seed value
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, 1:3000)
+# pairs <- lapply(rdd, function(x) { if (x %% 3 == 0) list("a", x)
+#                                    else { if (x %% 3 == 1) list("b", x) else list("c", x) }})
+# fractions <- list(a = 0.2, b = 0.1, c = 0.3)
+# sample <- sampleByKey(pairs, FALSE, fractions, 1618L)
+# 100 < length(lookup(sample, "a")) && 300 > length(lookup(sample, "a")) # TRUE
+# 50 < length(lookup(sample, "b")) && 150 > length(lookup(sample, "b")) # TRUE
+# 200 < length(lookup(sample, "c")) && 400 > length(lookup(sample, "c")) # TRUE
+# lookup(sample, "a")[which.min(lookup(sample, "a"))] >= 0 # TRUE
+# lookup(sample, "a")[which.max(lookup(sample, "a"))] <= 2000 # TRUE
+# lookup(sample, "b")[which.min(lookup(sample, "b"))] >= 0 # TRUE
+# lookup(sample, "b")[which.max(lookup(sample, "b"))] <= 2000 # TRUE
+# lookup(sample, "c")[which.min(lookup(sample, "c"))] >= 0 # TRUE
+# lookup(sample, "c")[which.max(lookup(sample, "c"))] <= 2000 # TRUE
+# fractions <- list(a = 0.2, b = 0.1, c = 0.3, d = 0.4)
+# sample <- sampleByKey(pairs, FALSE, fractions, 1618L) # Key "d" will be ignored
+# fractions <- list(a = 0.2, b = 0.1)
+# sample <- sampleByKey(pairs, FALSE, fractions, 1618L) # KeyError: "c"
+#}
+# @rdname sampleByKey
+# @aliases sampleByKey,RDD-method
 setMethod("sampleByKey",
           signature(x = "RDD", withReplacement = "logical",
                     fractions = "vector", seed = "integer"),