From be53d3006865182e68c2c69eb432188c7009c4fe Mon Sep 17 00:00:00 2001
From: somideshmukh <somilde@us.ibm.com>
Date: Wed, 4 Nov 2015 16:38:39 +0530
Subject: [PATCH 1/2] [SPARK-10946][SQL]JDBC - Use Statement.executeUpdate
 instead of PreparedStatement.executeUpdate for DDLs

---
 .../src/main/scala/org/apache/spark/sql/DataFrameWriter.scala   | 2 +-
 .../apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
index 7887e559a3025..b8968a96f530b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
@@ -276,7 +276,7 @@ final class DataFrameWriter private[sql](df: DataFrame) {
       if (!tableExists) {
         val schema = JdbcUtils.schemaString(df, url)
         val sql = s"CREATE TABLE $table ($schema)"
-        conn.prepareStatement(sql).executeUpdate()
+        conn.createStatement.executeUpdate(sql)
       }
     } finally {
       conn.close()
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
index f89d55b20e212..a89f4f6ed9df8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
@@ -55,7 +55,7 @@ object JdbcUtils extends Logging {
    * Drops a table from the JDBC database.
    */
   def dropTable(conn: Connection, table: String): Unit = {
-    conn.prepareStatement(s"DROP TABLE $table").executeUpdate()
+    conn.createStatement.executeUpdate(s"DROP TABLE $table")
   }
 
   /**

From bfd43ab10d987e5dc23d008370a8cf7c4a782bd6 Mon Sep 17 00:00:00 2001
From: somideshmukh <somilde@us.ibm.com>
Date: Sat, 7 Nov 2015 14:07:22 +0530
Subject: [PATCH 2/2] [SPARK-11551][Example][Doc]Replace example code in
 ml-features.md using include_example

---
 docs/ml-features.md                           | 785 +-----------------
 .../spark/examples/ml/JavaBinarizer.java      |  64 ++
 .../spark/examples/ml/JavaBucketizer.java     |  67 ++
 .../org/apache/spark/examples/ml/JavaDCT.java |  61 ++
 .../examples/ml/JavaElementwiseProduct.java   |  65 ++
 .../apache/spark/examples/ml/JavaNGram.java   |  62 ++
 .../spark/examples/ml/JavaOneHotEncoder.java  |  70 ++
 .../spark/examples/ml/JavaPCAExample.java     |  62 ++
 .../examples/ml/JavaPolynomialExpansion.java  |  62 ++
 .../spark/examples/ml/JavaRFormula.java       |  64 ++
 .../examples/ml/JavaStopWordsRemover.java     |  59 ++
 .../spark/examples/ml/JavaStringIndexer.java  |  60 ++
 .../spark/examples/ml/JavaTokenizer.java      |  75 ++
 .../examples/ml/JavaVectorAssembler.java      |  61 ++
 .../spark/examples/ml/JavaVectorSlicer.java   |  68 ++
 .../spark/examples/ml/BinarizerExample.scala  |  44 +
 .../spark/examples/ml/BucketizerExample.scala |  44 +
 .../apache/spark/examples/ml/DCTExample.scala |  45 +
 .../ml/ElementWiseProductExample.scala        |  47 ++
 .../examples/ml/MinMaxScalerExample.scala     |  43 +
 .../spark/examples/ml/NGramExample.scala      |  44 +
 .../spark/examples/ml/NormalizerExample.scala |  43 +
 .../examples/ml/OneHotEncoderExample.scala    |  51 ++
 .../apache/spark/examples/ml/PCAExample.scala |  48 ++
 .../ml/PolynomialExpansionExample.scala       |  46 +
 .../spark/examples/ml/RFormulaExample.scala   |  44 +
 .../examples/ml/StandardScalerExample.scala   |  45 +
 .../examples/ml/StopWordsRemoverExample.scala |  45 +
 .../examples/ml/StringIndexerExample.scala    |  40 +
 .../spark/examples/ml/TokenizerExample.scala  |  49 ++
 .../examples/ml/VectorAssemblerExample.scala  |  42 +
 .../examples/ml/VectorIndexerExample.scala    |  44 +
 32 files changed, 1696 insertions(+), 753 deletions(-)
 create mode 100644 examples/src/main/java/org/apache/spark/examples/ml/JavaBinarizer.java
 create mode 100644 examples/src/main/java/org/apache/spark/examples/ml/JavaBucketizer.java
 create mode 100644 examples/src/main/java/org/apache/spark/examples/ml/JavaDCT.java
 create mode 100644 examples/src/main/java/org/apache/spark/examples/ml/JavaElementwiseProduct.java
 create mode 100644 examples/src/main/java/org/apache/spark/examples/ml/JavaNGram.java
 create mode 100644 examples/src/main/java/org/apache/spark/examples/ml/JavaOneHotEncoder.java
 create mode 100644 examples/src/main/java/org/apache/spark/examples/ml/JavaPCAExample.java
 create mode 100644 examples/src/main/java/org/apache/spark/examples/ml/JavaPolynomialExpansion.java
 create mode 100644 examples/src/main/java/org/apache/spark/examples/ml/JavaRFormula.java
 create mode 100644 examples/src/main/java/org/apache/spark/examples/ml/JavaStopWordsRemover.java
 create mode 100644 examples/src/main/java/org/apache/spark/examples/ml/JavaStringIndexer.java
 create mode 100644 examples/src/main/java/org/apache/spark/examples/ml/JavaTokenizer.java
 create mode 100644 examples/src/main/java/org/apache/spark/examples/ml/JavaVectorAssembler.java
 create mode 100644 examples/src/main/java/org/apache/spark/examples/ml/JavaVectorSlicer.java
 create mode 100644 examples/src/main/scala/org/apache/spark/examples/ml/BinarizerExample.scala
 create mode 100644 examples/src/main/scala/org/apache/spark/examples/ml/BucketizerExample.scala
 create mode 100644 examples/src/main/scala/org/apache/spark/examples/ml/DCTExample.scala
 create mode 100644 examples/src/main/scala/org/apache/spark/examples/ml/ElementWiseProductExample.scala
 create mode 100644 examples/src/main/scala/org/apache/spark/examples/ml/MinMaxScalerExample.scala
 create mode 100644 examples/src/main/scala/org/apache/spark/examples/ml/NGramExample.scala
 create mode 100644 examples/src/main/scala/org/apache/spark/examples/ml/NormalizerExample.scala
 create mode 100644 examples/src/main/scala/org/apache/spark/examples/ml/OneHotEncoderExample.scala
 create mode 100644 examples/src/main/scala/org/apache/spark/examples/ml/PCAExample.scala
 create mode 100644 examples/src/main/scala/org/apache/spark/examples/ml/PolynomialExpansionExample.scala
 create mode 100644 examples/src/main/scala/org/apache/spark/examples/ml/RFormulaExample.scala
 create mode 100644 examples/src/main/scala/org/apache/spark/examples/ml/StandardScalerExample.scala
 create mode 100644 examples/src/main/scala/org/apache/spark/examples/ml/StopWordsRemoverExample.scala
 create mode 100644 examples/src/main/scala/org/apache/spark/examples/ml/StringIndexerExample.scala
 create mode 100644 examples/src/main/scala/org/apache/spark/examples/ml/TokenizerExample.scala
 create mode 100644 examples/src/main/scala/org/apache/spark/examples/ml/VectorAssemblerExample.scala
 create mode 100644 examples/src/main/scala/org/apache/spark/examples/ml/VectorIndexerExample.scala

diff --git a/docs/ml-features.md b/docs/ml-features.md
index 142afac2f3f95..3b4105e331b0a 100644
--- a/docs/ml-features.md
+++ b/docs/ml-features.md
@@ -1,3 +1,4 @@
+
 ---
 layout: global
 title: Feature Extraction, Transformation, and Selection - SparkML
@@ -170,25 +171,7 @@ Refer to the [Tokenizer Scala docs](api/scala/index.html#org.apache.spark.ml.fea
 and the [RegexTokenizer Scala docs](api/scala/index.html#org.apache.spark.ml.feature.Tokenizer)
 for more details on the API.
 
-{% highlight scala %}
-import org.apache.spark.ml.feature.{Tokenizer, RegexTokenizer}
-
-val sentenceDataFrame = sqlContext.createDataFrame(Seq(
-  (0, "Hi I heard about Spark"),
-  (1, "I wish Java could use case classes"),
-  (2, "Logistic,regression,models,are,neat")
-)).toDF("label", "sentence")
-val tokenizer = new Tokenizer().setInputCol("sentence").setOutputCol("words")
-val regexTokenizer = new RegexTokenizer()
-  .setInputCol("sentence")
-  .setOutputCol("words")
-  .setPattern("\\W")  // alternatively .setPattern("\\w+").setGaps(false)
-
-val tokenized = tokenizer.transform(sentenceDataFrame)
-tokenized.select("words", "label").take(3).foreach(println)
-val regexTokenized = regexTokenizer.transform(sentenceDataFrame)
-regexTokenized.select("words", "label").take(3).foreach(println)
-{% endhighlight %}
+{% include_example scala/org/apache/spark/examples/ml/Tokenizer.scala %}
 </div>
 
 <div data-lang="java" markdown="1">
@@ -197,44 +180,7 @@ Refer to the [Tokenizer Java docs](api/java/org/apache/spark/ml/feature/Tokenize
 and the [RegexTokenizer Java docs](api/java/org/apache/spark/ml/feature/RegexTokenizer.html)
 for more details on the API.
 
-{% highlight java %}
-import java.util.Arrays;
-
-import org.apache.spark.api.java.JavaRDD;
-import org.apache.spark.ml.feature.RegexTokenizer;
-import org.apache.spark.ml.feature.Tokenizer;
-import org.apache.spark.mllib.linalg.Vector;
-import org.apache.spark.sql.DataFrame;
-import org.apache.spark.sql.Row;
-import org.apache.spark.sql.RowFactory;
-import org.apache.spark.sql.types.DataTypes;
-import org.apache.spark.sql.types.Metadata;
-import org.apache.spark.sql.types.StructField;
-import org.apache.spark.sql.types.StructType;
-
-JavaRDD<Row> jrdd = jsc.parallelize(Arrays.asList(
-  RowFactory.create(0, "Hi I heard about Spark"),
-  RowFactory.create(1, "I wish Java could use case classes"),
-  RowFactory.create(2, "Logistic,regression,models,are,neat")
-));
-StructType schema = new StructType(new StructField[]{
-  new StructField("label", DataTypes.DoubleType, false, Metadata.empty()),
-  new StructField("sentence", DataTypes.StringType, false, Metadata.empty())
-});
-DataFrame sentenceDataFrame = sqlContext.createDataFrame(jrdd, schema);
-Tokenizer tokenizer = new Tokenizer().setInputCol("sentence").setOutputCol("words");
-DataFrame wordsDataFrame = tokenizer.transform(sentenceDataFrame);
-for (Row r : wordsDataFrame.select("words", "label").take(3)) {
-  java.util.List<String> words = r.getList(0);
-  for (String word : words) System.out.print(word + " ");
-  System.out.println();
-}
-
-RegexTokenizer regexTokenizer = new RegexTokenizer()
-  .setInputCol("sentence")
-  .setOutputCol("words")
-  .setPattern("\\W");  // alternatively .setPattern("\\w+").setGaps(false);
-{% endhighlight %}
+{% include_example java/org/apache/spark/examples/ml/JavaTokenizer.java %}
 </div>
 
 <div data-lang="python" markdown="1">
@@ -306,19 +252,7 @@ filtered out.
 Refer to the [StopWordsRemover Scala docs](api/scala/index.html#org.apache.spark.ml.feature.StopWordsRemover)
 for more details on the API.
 
-{% highlight scala %}
-import org.apache.spark.ml.feature.StopWordsRemover
-
-val remover = new StopWordsRemover()
-  .setInputCol("raw")
-  .setOutputCol("filtered")
-val dataSet = sqlContext.createDataFrame(Seq(
-  (0, Seq("I", "saw", "the", "red", "baloon")),
-  (1, Seq("Mary", "had", "a", "little", "lamb"))
-)).toDF("id", "raw")
-
-remover.transform(dataSet).show()
-{% endhighlight %}
+{% include_example scala/org/apache/spark/examples/ml/StopWordsRemover.scala %}
 </div>
 
 <div data-lang="java" markdown="1">
@@ -326,34 +260,7 @@ remover.transform(dataSet).show()
 Refer to the [StopWordsRemover Java docs](api/java/org/apache/spark/ml/feature/StopWordsRemover.html)
 for more details on the API.
 
-{% highlight java %}
-import java.util.Arrays;
-
-import org.apache.spark.api.java.JavaRDD;
-import org.apache.spark.ml.feature.StopWordsRemover;
-import org.apache.spark.sql.DataFrame;
-import org.apache.spark.sql.Row;
-import org.apache.spark.sql.RowFactory;
-import org.apache.spark.sql.types.DataTypes;
-import org.apache.spark.sql.types.Metadata;
-import org.apache.spark.sql.types.StructField;
-import org.apache.spark.sql.types.StructType;
-
-StopWordsRemover remover = new StopWordsRemover()
-  .setInputCol("raw")
-  .setOutputCol("filtered");
-
-JavaRDD<Row> rdd = jsc.parallelize(Arrays.asList(
-  RowFactory.create(Arrays.asList("I", "saw", "the", "red", "baloon")),
-  RowFactory.create(Arrays.asList("Mary", "had", "a", "little", "lamb"))
-));
-StructType schema = new StructType(new StructField[] {
-  new StructField("raw", DataTypes.createArrayType(DataTypes.StringType), false, Metadata.empty())
-});
-DataFrame dataset = jsql.createDataFrame(rdd, schema);
-
-remover.transform(dataset).show();
-{% endhighlight %}
+{% include_example java/org/apache/spark/examples/ml/JavaStopWordsRemover.java %}
 </div>
 
 <div data-lang="python" markdown="1">
@@ -388,19 +295,7 @@ An [n-gram](https://en.wikipedia.org/wiki/N-gram) is a sequence of $n$ tokens (t
 Refer to the [NGram Scala docs](api/scala/index.html#org.apache.spark.ml.feature.NGram)
 for more details on the API.
 
-{% highlight scala %}
-import org.apache.spark.ml.feature.NGram
-
-val wordDataFrame = sqlContext.createDataFrame(Seq(
-  (0, Array("Hi", "I", "heard", "about", "Spark")),
-  (1, Array("I", "wish", "Java", "could", "use", "case", "classes")),
-  (2, Array("Logistic", "regression", "models", "are", "neat"))
-)).toDF("label", "words")
-
-val ngram = new NGram().setInputCol("words").setOutputCol("ngrams")
-val ngramDataFrame = ngram.transform(wordDataFrame)
-ngramDataFrame.take(3).map(_.getAs[Stream[String]]("ngrams").toList).foreach(println)
-{% endhighlight %}
+{% include_example scala/org/apache/spark/examples/ml/NGram.scala %}
 </div>
 
 <div data-lang="java" markdown="1">
@@ -408,38 +303,7 @@ ngramDataFrame.take(3).map(_.getAs[Stream[String]]("ngrams").toList).foreach(pri
 Refer to the [NGram Java docs](api/java/org/apache/spark/ml/feature/NGram.html)
 for more details on the API.
 
-{% highlight java %}
-import java.util.Arrays;
-
-import org.apache.spark.api.java.JavaRDD;
-import org.apache.spark.ml.feature.NGram;
-import org.apache.spark.mllib.linalg.Vector;
-import org.apache.spark.sql.DataFrame;
-import org.apache.spark.sql.Row;
-import org.apache.spark.sql.RowFactory;
-import org.apache.spark.sql.types.DataTypes;
-import org.apache.spark.sql.types.Metadata;
-import org.apache.spark.sql.types.StructField;
-import org.apache.spark.sql.types.StructType;
-
-JavaRDD<Row> jrdd = jsc.parallelize(Arrays.asList(
-  RowFactory.create(0.0, Arrays.asList("Hi", "I", "heard", "about", "Spark")),
-  RowFactory.create(1.0, Arrays.asList("I", "wish", "Java", "could", "use", "case", "classes")),
-  RowFactory.create(2.0, Arrays.asList("Logistic", "regression", "models", "are", "neat"))
-));
-StructType schema = new StructType(new StructField[]{
-  new StructField("label", DataTypes.DoubleType, false, Metadata.empty()),
-  new StructField("words", DataTypes.createArrayType(DataTypes.StringType), false, Metadata.empty())
-});
-DataFrame wordDataFrame = sqlContext.createDataFrame(jrdd, schema);
-NGram ngramTransformer = new NGram().setInputCol("words").setOutputCol("ngrams");
-DataFrame ngramDataFrame = ngramTransformer.transform(wordDataFrame);
-for (Row r : ngramDataFrame.select("ngrams", "label").take(3)) {
-  java.util.List<String> ngrams = r.getList(0);
-  for (String ngram : ngrams) System.out.print(ngram + " --- ");
-  System.out.println();
-}
-{% endhighlight %}
+{% include_example java/org/apache/spark/examples/ml/JavaNGram.java %}
 </div>
 
 <div data-lang="python" markdown="1">
@@ -476,26 +340,7 @@ Binarization is the process of thresholding numerical features to binary (0/1) f
 Refer to the [Binarizer Scala docs](api/scala/index.html#org.apache.spark.ml.feature.Binarizer)
 for more details on the API.
 
-{% highlight scala %}
-import org.apache.spark.ml.feature.Binarizer
-import org.apache.spark.sql.DataFrame
-
-val data = Array(
-  (0, 0.1),
-  (1, 0.8),
-  (2, 0.2)
-)
-val dataFrame: DataFrame = sqlContext.createDataFrame(data).toDF("label", "feature")
-
-val binarizer: Binarizer = new Binarizer()
-  .setInputCol("feature")
-  .setOutputCol("binarized_feature")
-  .setThreshold(0.5)
-
-val binarizedDataFrame = binarizer.transform(dataFrame)
-val binarizedFeatures = binarizedDataFrame.select("binarized_feature")
-binarizedFeatures.collect().foreach(println)
-{% endhighlight %}
+{% include_example scala/org/apache/spark/examples/ml/Binarizer.scala %}
 </div>
 
 <div data-lang="java" markdown="1">
@@ -503,40 +348,7 @@ binarizedFeatures.collect().foreach(println)
 Refer to the [Binarizer Java docs](api/java/org/apache/spark/ml/feature/Binarizer.html)
 for more details on the API.
 
-{% highlight java %}
-import java.util.Arrays;
-
-import org.apache.spark.api.java.JavaRDD;
-import org.apache.spark.ml.feature.Binarizer;
-import org.apache.spark.sql.DataFrame;
-import org.apache.spark.sql.Row;
-import org.apache.spark.sql.RowFactory;
-import org.apache.spark.sql.types.DataTypes;
-import org.apache.spark.sql.types.Metadata;
-import org.apache.spark.sql.types.StructField;
-import org.apache.spark.sql.types.StructType;
-
-JavaRDD<Row> jrdd = jsc.parallelize(Arrays.asList(
-  RowFactory.create(0, 0.1),
-  RowFactory.create(1, 0.8),
-  RowFactory.create(2, 0.2)
-));
-StructType schema = new StructType(new StructField[]{
-  new StructField("label", DataTypes.DoubleType, false, Metadata.empty()),
-  new StructField("feature", DataTypes.DoubleType, false, Metadata.empty())
-});
-DataFrame continuousDataFrame = jsql.createDataFrame(jrdd, schema);
-Binarizer binarizer = new Binarizer()
-  .setInputCol("feature")
-  .setOutputCol("binarized_feature")
-  .setThreshold(0.5);
-DataFrame binarizedDataFrame = binarizer.transform(continuousDataFrame);
-DataFrame binarizedFeatures = binarizedDataFrame.select("binarized_feature");
-for (Row r : binarizedFeatures.collect()) {
-  Double binarized_value = r.getDouble(0);
-  System.out.println(binarized_value);
-}
-{% endhighlight %}
+{% include_example java/org/apache/spark/examples/ml/JavaBinarizer.java %}
 </div>
 
 <div data-lang="python" markdown="1">
@@ -571,25 +383,7 @@ for binarized_feature, in binarizedFeatures.collect():
 Refer to the [PCA Scala docs](api/scala/index.html#org.apache.spark.ml.feature.PCA)
 for more details on the API.
 
-{% highlight scala %}
-import org.apache.spark.ml.feature.PCA
-import org.apache.spark.mllib.linalg.Vectors
-
-val data = Array(
-  Vectors.sparse(5, Seq((1, 1.0), (3, 7.0))),
-  Vectors.dense(2.0, 0.0, 3.0, 4.0, 5.0),
-  Vectors.dense(4.0, 0.0, 0.0, 6.0, 7.0)
-)
-val df = sqlContext.createDataFrame(data.map(Tuple1.apply)).toDF("features")
-val pca = new PCA()
-  .setInputCol("features")
-  .setOutputCol("pcaFeatures")
-  .setK(3)
-  .fit(df)
-val pcaDF = pca.transform(df)
-val result = pcaDF.select("pcaFeatures")
-result.show()
-{% endhighlight %}
+{% include_example scala/org/apache/spark/examples/ml/PCAExample.scala %}
 </div>
 
 <div data-lang="java" markdown="1">
@@ -597,42 +391,7 @@ result.show()
 Refer to the [PCA Java docs](api/java/org/apache/spark/ml/feature/PCA.html)
 for more details on the API.
 
-{% highlight java %}
-import java.util.Arrays;
-
-import org.apache.spark.api.java.JavaRDD;
-import org.apache.spark.api.java.JavaSparkContext;
-import org.apache.spark.ml.feature.PCA
-import org.apache.spark.ml.feature.PCAModel
-import org.apache.spark.mllib.linalg.VectorUDT;
-import org.apache.spark.mllib.linalg.Vectors;
-import org.apache.spark.sql.DataFrame;
-import org.apache.spark.sql.Row;
-import org.apache.spark.sql.RowFactory;
-import org.apache.spark.sql.SQLContext;
-import org.apache.spark.sql.types.Metadata;
-import org.apache.spark.sql.types.StructField;
-import org.apache.spark.sql.types.StructType;
-
-JavaSparkContext jsc = ...
-SQLContext jsql = ...
-JavaRDD<Row> data = jsc.parallelize(Arrays.asList(
-  RowFactory.create(Vectors.sparse(5, new int[]{1, 3}, new double[]{1.0, 7.0})),
-  RowFactory.create(Vectors.dense(2.0, 0.0, 3.0, 4.0, 5.0)),
-  RowFactory.create(Vectors.dense(4.0, 0.0, 0.0, 6.0, 7.0))
-));
-StructType schema = new StructType(new StructField[] {
-  new StructField("features", new VectorUDT(), false, Metadata.empty()),
-});
-DataFrame df = jsql.createDataFrame(data, schema);
-PCAModel pca = new PCA()
-  .setInputCol("features")
-  .setOutputCol("pcaFeatures")
-  .setK(3)
-  .fit(df);
-DataFrame result = pca.transform(df).select("pcaFeatures");
-result.show();
-{% endhighlight %}
+{% include_example java/org/apache/spark/examples/ml/JavaPCAExample.java %}
 </div>
 
 <div data-lang="python" markdown="1">
@@ -666,23 +425,7 @@ result.show(truncate=False)
 Refer to the [PolynomialExpansion Scala docs](api/scala/index.html#org.apache.spark.ml.feature.PolynomialExpansion)
 for more details on the API.
 
-{% highlight scala %}
-import org.apache.spark.ml.feature.PolynomialExpansion
-import org.apache.spark.mllib.linalg.Vectors
-
-val data = Array(
-  Vectors.dense(-2.0, 2.3),
-  Vectors.dense(0.0, 0.0),
-  Vectors.dense(0.6, -1.1)
-)
-val df = sqlContext.createDataFrame(data.map(Tuple1.apply)).toDF("features")
-val polynomialExpansion = new PolynomialExpansion()
-  .setInputCol("features")
-  .setOutputCol("polyFeatures")
-  .setDegree(3)
-val polyDF = polynomialExpansion.transform(df)
-polyDF.select("polyFeatures").take(3).foreach(println)
-{% endhighlight %}
+{% include_example scala/org/apache/spark/examples/ml/PolynomialExpansion.scala %}
 </div>
 
 <div data-lang="java" markdown="1">
@@ -690,43 +433,7 @@ polyDF.select("polyFeatures").take(3).foreach(println)
 Refer to the [PolynomialExpansion Java docs](api/java/org/apache/spark/ml/feature/PolynomialExpansion.html)
 for more details on the API.
 
-{% highlight java %}
-import java.util.Arrays;
-
-import org.apache.spark.api.java.JavaRDD;
-import org.apache.spark.api.java.JavaSparkContext;
-import org.apache.spark.mllib.linalg.Vector;
-import org.apache.spark.mllib.linalg.VectorUDT;
-import org.apache.spark.mllib.linalg.Vectors;
-import org.apache.spark.sql.DataFrame;
-import org.apache.spark.sql.Row;
-import org.apache.spark.sql.RowFactory;
-import org.apache.spark.sql.SQLContext;
-import org.apache.spark.sql.types.Metadata;
-import org.apache.spark.sql.types.StructField;
-import org.apache.spark.sql.types.StructType;
-
-JavaSparkContext jsc = ...
-SQLContext jsql = ...
-PolynomialExpansion polyExpansion = new PolynomialExpansion()
-  .setInputCol("features")
-  .setOutputCol("polyFeatures")
-  .setDegree(3);
-JavaRDD<Row> data = jsc.parallelize(Arrays.asList(
-  RowFactory.create(Vectors.dense(-2.0, 2.3)),
-  RowFactory.create(Vectors.dense(0.0, 0.0)),
-  RowFactory.create(Vectors.dense(0.6, -1.1))
-));
-StructType schema = new StructType(new StructField[] {
-  new StructField("features", new VectorUDT(), false, Metadata.empty()),
-});
-DataFrame df = jsql.createDataFrame(data, schema);
-DataFrame polyDF = polyExpansion.transform(df);
-Row[] row = polyDF.select("polyFeatures").take(3);
-for (Row r : row) {
-  System.out.println(r.get(0));
-}
-{% endhighlight %}
+{% include_example java/org/apache/spark/examples/ml/JavaPolynomialExpansion.java %}
 </div>
 
 <div data-lang="python" markdown="1">
@@ -771,22 +478,7 @@ $0$th DCT coefficient and _not_ the $N/2$th).
 Refer to the [DCT Scala docs](api/scala/index.html#org.apache.spark.ml.feature.DCT)
 for more details on the API.
 
-{% highlight scala %}
-import org.apache.spark.ml.feature.DCT
-import org.apache.spark.mllib.linalg.Vectors
-
-val data = Seq(
-  Vectors.dense(0.0, 1.0, -2.0, 3.0),
-  Vectors.dense(-1.0, 2.0, 4.0, -7.0),
-  Vectors.dense(14.0, -2.0, -5.0, 1.0))
-val df = sqlContext.createDataFrame(data.map(Tuple1.apply)).toDF("features")
-val dct = new DCT()
-  .setInputCol("features")
-  .setOutputCol("featuresDCT")
-  .setInverse(false)
-val dctDf = dct.transform(df)
-dctDf.select("featuresDCT").show(3)
-{% endhighlight %}
+{% include_example scala/org/apache/spark/examples/ml/DCT.scala %}
 </div>
 
 <div data-lang="java" markdown="1">
@@ -794,39 +486,7 @@ dctDf.select("featuresDCT").show(3)
 Refer to the [DCT Java docs](api/java/org/apache/spark/ml/feature/DCT.html)
 for more details on the API.
 
-{% highlight java %}
-import java.util.Arrays;
-
-import org.apache.spark.api.java.JavaRDD;
-import org.apache.spark.api.java.JavaSparkContext;
-import org.apache.spark.ml.feature.DCT;
-import org.apache.spark.mllib.linalg.Vector;
-import org.apache.spark.mllib.linalg.VectorUDT;
-import org.apache.spark.mllib.linalg.Vectors;
-import org.apache.spark.sql.DataFrame;
-import org.apache.spark.sql.Row;
-import org.apache.spark.sql.RowFactory;
-import org.apache.spark.sql.SQLContext;
-import org.apache.spark.sql.types.Metadata;
-import org.apache.spark.sql.types.StructField;
-import org.apache.spark.sql.types.StructType;
-
-JavaRDD<Row> data = jsc.parallelize(Arrays.asList(
-  RowFactory.create(Vectors.dense(0.0, 1.0, -2.0, 3.0)),
-  RowFactory.create(Vectors.dense(-1.0, 2.0, 4.0, -7.0)),
-  RowFactory.create(Vectors.dense(14.0, -2.0, -5.0, 1.0))
-));
-StructType schema = new StructType(new StructField[] {
-  new StructField("features", new VectorUDT(), false, Metadata.empty()),
-});
-DataFrame df = jsql.createDataFrame(data, schema);
-DCT dct = new DCT()
-  .setInputCol("features")
-  .setOutputCol("featuresDCT")
-  .setInverse(false);
-DataFrame dctDf = dct.transform(df);
-dctDf.select("featuresDCT").show(3);
-{% endhighlight %}
+{% include_example java/org/apache/spark/examples/ml/JavaDCT.java %}
 </div>
 </div>
 
@@ -881,18 +541,7 @@ index `2`.
 Refer to the [StringIndexer Scala docs](api/scala/index.html#org.apache.spark.ml.feature.StringIndexer)
 for more details on the API.
 
-{% highlight scala %}
-import org.apache.spark.ml.feature.StringIndexer
-
-val df = sqlContext.createDataFrame(
-  Seq((0, "a"), (1, "b"), (2, "c"), (3, "a"), (4, "a"), (5, "c"))
-).toDF("id", "category")
-val indexer = new StringIndexer()
-  .setInputCol("category")
-  .setOutputCol("categoryIndex")
-val indexed = indexer.fit(df).transform(df)
-indexed.show()
-{% endhighlight %}
+{% include_example scala/org/apache/spark/examples/ml/StringIndexer.scala %}
 </div>
 
 <div data-lang="java" markdown="1">
@@ -900,37 +549,7 @@ indexed.show()
 Refer to the [StringIndexer Java docs](api/java/org/apache/spark/ml/feature/StringIndexer.html)
 for more details on the API.
 
-{% highlight java %}
-import java.util.Arrays;
-
-import org.apache.spark.api.java.JavaRDD;
-import org.apache.spark.ml.feature.StringIndexer;
-import org.apache.spark.sql.DataFrame;
-import org.apache.spark.sql.Row;
-import org.apache.spark.sql.RowFactory;
-import org.apache.spark.sql.types.StructField;
-import org.apache.spark.sql.types.StructType;
-import static org.apache.spark.sql.types.DataTypes.*;
-
-JavaRDD<Row> jrdd = jsc.parallelize(Arrays.asList(
-  RowFactory.create(0, "a"),
-  RowFactory.create(1, "b"),
-  RowFactory.create(2, "c"),
-  RowFactory.create(3, "a"),
-  RowFactory.create(4, "a"),
-  RowFactory.create(5, "c")
-));
-StructType schema = new StructType(new StructField[] {
-  createStructField("id", DoubleType, false),
-  createStructField("category", StringType, false)
-});
-DataFrame df = sqlContext.createDataFrame(jrdd, schema);
-StringIndexer indexer = new StringIndexer()
-  .setInputCol("category")
-  .setOutputCol("categoryIndex");
-DataFrame indexed = indexer.fit(df).transform(df);
-indexed.show();
-{% endhighlight %}
+{% include_example java/org/apache/spark/examples/ml/JavaStringIndexer.java %}
 </div>
 
 <div data-lang="python" markdown="1">
@@ -961,29 +580,7 @@ indexed.show()
 Refer to the [OneHotEncoder Scala docs](api/scala/index.html#org.apache.spark.ml.feature.OneHotEncoder)
 for more details on the API.
 
-{% highlight scala %}
-import org.apache.spark.ml.feature.{OneHotEncoder, StringIndexer}
-
-val df = sqlContext.createDataFrame(Seq(
-  (0, "a"),
-  (1, "b"),
-  (2, "c"),
-  (3, "a"),
-  (4, "a"),
-  (5, "c")
-)).toDF("id", "category")
-
-val indexer = new StringIndexer()
-  .setInputCol("category")
-  .setOutputCol("categoryIndex")
-  .fit(df)
-val indexed = indexer.transform(df)
-
-val encoder = new OneHotEncoder().setInputCol("categoryIndex").
-  setOutputCol("categoryVec")
-val encoded = encoder.transform(indexed)
-encoded.select("id", "categoryVec").foreach(println)
-{% endhighlight %}
+{% include_example scala/org/apache/spark/examples/ml/OneHotEncoder.scala %}
 </div>
 
 <div data-lang="java" markdown="1">
@@ -991,45 +588,7 @@ encoded.select("id", "categoryVec").foreach(println)
 Refer to the [OneHotEncoder Java docs](api/java/org/apache/spark/ml/feature/OneHotEncoder.html)
 for more details on the API.
 
-{% highlight java %}
-import java.util.Arrays;
-
-import org.apache.spark.api.java.JavaRDD;
-import org.apache.spark.ml.feature.OneHotEncoder;
-import org.apache.spark.ml.feature.StringIndexer;
-import org.apache.spark.ml.feature.StringIndexerModel;
-import org.apache.spark.sql.DataFrame;
-import org.apache.spark.sql.Row;
-import org.apache.spark.sql.RowFactory;
-import org.apache.spark.sql.types.DataTypes;
-import org.apache.spark.sql.types.Metadata;
-import org.apache.spark.sql.types.StructField;
-import org.apache.spark.sql.types.StructType;
-
-JavaRDD<Row> jrdd = jsc.parallelize(Arrays.asList(
-    RowFactory.create(0, "a"),
-    RowFactory.create(1, "b"),
-    RowFactory.create(2, "c"),
-    RowFactory.create(3, "a"),
-    RowFactory.create(4, "a"),
-    RowFactory.create(5, "c")
-));
-StructType schema = new StructType(new StructField[]{
-    new StructField("id", DataTypes.DoubleType, false, Metadata.empty()),
-    new StructField("category", DataTypes.StringType, false, Metadata.empty())
-});
-DataFrame df = sqlContext.createDataFrame(jrdd, schema);
-StringIndexerModel indexer = new StringIndexer()
-  .setInputCol("category")
-  .setOutputCol("categoryIndex")
-  .fit(df);
-DataFrame indexed = indexer.transform(df);
-
-OneHotEncoder encoder = new OneHotEncoder()
-  .setInputCol("categoryIndex")
-  .setOutputCol("categoryVec");
-DataFrame encoded = encoder.transform(indexed);
-{% endhighlight %}
+{% include_example java/org/apache/spark/examples/ml/JavaOneHotEncoder.java %}
 </div>
 
 <div data-lang="python" markdown="1">
@@ -1078,23 +637,7 @@ In the example below, we read in a dataset of labeled points and then use `Vecto
 Refer to the [VectorIndexer Scala docs](api/scala/index.html#org.apache.spark.ml.feature.VectorIndexer)
 for more details on the API.
 
-{% highlight scala %}
-import org.apache.spark.ml.feature.VectorIndexer
-
-val data = sqlContext.read.format("libsvm")
-  .load("data/mllib/sample_libsvm_data.txt")
-val indexer = new VectorIndexer()
-  .setInputCol("features")
-  .setOutputCol("indexed")
-  .setMaxCategories(10)
-val indexerModel = indexer.fit(data)
-val categoricalFeatures: Set[Int] = indexerModel.categoryMaps.keys.toSet
-println(s"Chose ${categoricalFeatures.size} categorical features: " +
-  categoricalFeatures.mkString(", "))
-
-// Create new column "indexed" with categorical values transformed to indices
-val indexedData = indexerModel.transform(data)
-{% endhighlight %}
+{% include_example scala/org/apache/spark/examples/ml/VectorIndexer.scala %}
 </div>
 
 <div data-lang="java" markdown="1">
@@ -1160,22 +703,7 @@ The following example demonstrates how to load a dataset in libsvm format and th
 Refer to the [Normalizer Scala docs](api/scala/index.html#org.apache.spark.ml.feature.Normalizer)
 for more details on the API.
 
-{% highlight scala %}
-import org.apache.spark.ml.feature.Normalizer
-
-val dataFrame = sqlContext.read.format("libsvm")
-  .load("data/mllib/sample_libsvm_data.txt")
-
-// Normalize each Vector using $L^1$ norm.
-val normalizer = new Normalizer()
-  .setInputCol("features")
-  .setOutputCol("normFeatures")
-  .setP(1.0)
-val l1NormData = normalizer.transform(dataFrame)
-
-// Normalize each Vector using $L^\infty$ norm.
-val lInfNormData = normalizer.transform(dataFrame, normalizer.p -> Double.PositiveInfinity)
-{% endhighlight %}
+{% include_example scala/org/apache/spark/examples/ml/NormalizerExample.scala %}
 </div>
 
 <div data-lang="java">
@@ -1244,23 +772,7 @@ The following example demonstrates how to load a dataset in libsvm format and th
 Refer to the [StandardScaler Scala docs](api/scala/index.html#org.apache.spark.ml.feature.StandardScaler)
 for more details on the API.
 
-{% highlight scala %}
-import org.apache.spark.ml.feature.StandardScaler
-
-val dataFrame = sqlContext.read.format("libsvm")
-  .load("data/mllib/sample_libsvm_data.txt")
-val scaler = new StandardScaler()
-  .setInputCol("features")
-  .setOutputCol("scaledFeatures")
-  .setWithStd(true)
-  .setWithMean(false)
-
-// Compute summary statistics by fitting the StandardScaler
-val scalerModel = scaler.fit(dataFrame)
-
-// Normalize each feature to have unit standard deviation.
-val scaledData = scalerModel.transform(dataFrame)
-{% endhighlight %}
+{% include_example scala/org/apache/spark/examples/ml/StandardScalerExample.scala %}
 </div>
 
 <div data-lang="java">
@@ -1337,21 +849,7 @@ Refer to the [MinMaxScaler Scala docs](api/scala/index.html#org.apache.spark.ml.
 and the [MinMaxScalerModel Scala docs](api/scala/index.html#org.apache.spark.ml.feature.MinMaxScalerModel)
 for more details on the API.
 
-{% highlight scala %}
-import org.apache.spark.ml.feature.MinMaxScaler
-
-val dataFrame = sqlContext.read.format("libsvm")
-  .load("data/mllib/sample_libsvm_data.txt")
-val scaler = new MinMaxScaler()
-  .setInputCol("features")
-  .setOutputCol("scaledFeatures")
-
-// Compute summary statistics and generate MinMaxScalerModel
-val scalerModel = scaler.fit(dataFrame)
-
-// rescale each feature to range [min, max].
-val scaledData = scalerModel.transform(dataFrame)
-{% endhighlight %}
+{% include_example scala/org/apache/spark/examples/ml/MinMaxScalerExample.scala %}
 </div>
 
 <div data-lang="java" markdown="1">
@@ -1401,23 +899,7 @@ The following example demonstrates how to bucketize a column of `Double`s into a
 Refer to the [Bucketizer Scala docs](api/scala/index.html#org.apache.spark.ml.feature.Bucketizer)
 for more details on the API.
 
-{% highlight scala %}
-import org.apache.spark.ml.feature.Bucketizer
-import org.apache.spark.sql.DataFrame
-
-val splits = Array(Double.NegativeInfinity, -0.5, 0.0, 0.5, Double.PositiveInfinity)
-
-val data = Array(-0.5, -0.3, 0.0, 0.2)
-val dataFrame = sqlContext.createDataFrame(data.map(Tuple1.apply)).toDF("features")
-
-val bucketizer = new Bucketizer()
-  .setInputCol("features")
-  .setOutputCol("bucketedFeatures")
-  .setSplits(splits)
-
-// Transform original data into its bucket index.
-val bucketedData = bucketizer.transform(dataFrame)
-{% endhighlight %}
+{% include_example scala/org/apache/spark/examples/ml/BucketizerExample.scala %}
 </div>
 
 <div data-lang="java">
@@ -1425,38 +907,7 @@ val bucketedData = bucketizer.transform(dataFrame)
 Refer to the [Bucketizer Java docs](api/java/org/apache/spark/ml/feature/Bucketizer.html)
 for more details on the API.
 
-{% highlight java %}
-import java.util.Arrays;
-
-import org.apache.spark.sql.DataFrame;
-import org.apache.spark.sql.Row;
-import org.apache.spark.sql.RowFactory;
-import org.apache.spark.sql.types.DataTypes;
-import org.apache.spark.sql.types.Metadata;
-import org.apache.spark.sql.types.StructField;
-import org.apache.spark.sql.types.StructType;
-
-double[] splits = {Double.NEGATIVE_INFINITY, -0.5, 0.0, 0.5, Double.POSITIVE_INFINITY};
-
-JavaRDD<Row> data = jsc.parallelize(Arrays.asList(
-  RowFactory.create(-0.5),
-  RowFactory.create(-0.3),
-  RowFactory.create(0.0),
-  RowFactory.create(0.2)
-));
-StructType schema = new StructType(new StructField[] {
-  new StructField("features", DataTypes.DoubleType, false, Metadata.empty())
-});
-DataFrame dataFrame = jsql.createDataFrame(data, schema);
-
-Bucketizer bucketizer = new Bucketizer()
-  .setInputCol("features")
-  .setOutputCol("bucketedFeatures")
-  .setSplits(splits);
-
-// Transform original data into its bucket index.
-DataFrame bucketedData = bucketizer.transform(dataFrame);
-{% endhighlight %}
+{% include_example java/org/apache/spark/examples/ml/JavaBucketizer.java %}
 </div>
 
 <div data-lang="python">
@@ -1508,25 +959,7 @@ This example below demonstrates how to transform vectors using a transforming ve
 Refer to the [ElementwiseProduct Scala docs](api/scala/index.html#org.apache.spark.ml.feature.ElementwiseProduct)
 for more details on the API.
 
-{% highlight scala %}
-import org.apache.spark.ml.feature.ElementwiseProduct
-import org.apache.spark.mllib.linalg.Vectors
-
-// Create some vector data; also works for sparse vectors
-val dataFrame = sqlContext.createDataFrame(Seq(
-  ("a", Vectors.dense(1.0, 2.0, 3.0)),
-  ("b", Vectors.dense(4.0, 5.0, 6.0)))).toDF("id", "vector")
-
-val transformingVector = Vectors.dense(0.0, 1.0, 2.0)
-val transformer = new ElementwiseProduct()
-  .setScalingVec(transformingVector)
-  .setInputCol("vector")
-  .setOutputCol("transformedVector")
-
-// Batch transform the vectors to create new column:
-transformer.transform(dataFrame).show()
-
-{% endhighlight %}
+{% include_example scala/org/apache/spark/examples/ml/ElementWiseProductExample.scala %}
 </div>
 
 <div data-lang="java" markdown="1">
@@ -1534,41 +967,7 @@ transformer.transform(dataFrame).show()
 Refer to the [ElementwiseProduct Java docs](api/java/org/apache/spark/ml/feature/ElementwiseProduct.html)
 for more details on the API.
 
-{% highlight java %}
-import java.util.Arrays;
-
-import org.apache.spark.api.java.JavaRDD;
-import org.apache.spark.ml.feature.ElementwiseProduct;
-import org.apache.spark.mllib.linalg.Vector;
-import org.apache.spark.mllib.linalg.Vectors;
-import org.apache.spark.sql.DataFrame;
-import org.apache.spark.sql.Row;
-import org.apache.spark.sql.RowFactory;
-import org.apache.spark.sql.SQLContext;
-import org.apache.spark.sql.types.DataTypes;
-import org.apache.spark.sql.types.Metadata;
-import org.apache.spark.sql.types.StructField;
-import org.apache.spark.sql.types.StructType;
-
-// Create some vector data; also works for sparse vectors
-JavaRDD<Row> jrdd = jsc.parallelize(Arrays.asList(
-  RowFactory.create("a", Vectors.dense(1.0, 2.0, 3.0)),
-  RowFactory.create("b", Vectors.dense(4.0, 5.0, 6.0))
-));
-List<StructField> fields = new ArrayList<StructField>(2);
-fields.add(DataTypes.createStructField("id", DataTypes.StringType, false));
-fields.add(DataTypes.createStructField("vector", DataTypes.StringType, false));
-StructType schema = DataTypes.createStructType(fields);
-DataFrame dataFrame = sqlContext.createDataFrame(jrdd, schema);
-Vector transformingVector = Vectors.dense(0.0, 1.0, 2.0);
-ElementwiseProduct transformer = new ElementwiseProduct()
-  .setScalingVec(transformingVector)
-  .setInputCol("vector")
-  .setOutputCol("transformedVector");
-// Batch transform the vectors to create new column:
-transformer.transform(dataFrame).show();
-
-{% endhighlight %}
+{% include_example java/org/apache/spark/examples/ml/JavaElementwiseProduct.java %}
 </div>
 
 <div data-lang="python" markdown="1">
@@ -1632,19 +1031,7 @@ output column to `features`, after transformation we should get the following Da
 Refer to the [VectorAssembler Scala docs](api/scala/index.html#org.apache.spark.ml.feature.VectorAssembler)
 for more details on the API.
 
-{% highlight scala %}
-import org.apache.spark.mllib.linalg.Vectors
-import org.apache.spark.ml.feature.VectorAssembler
-
-val dataset = sqlContext.createDataFrame(
-  Seq((0, 18, 1.0, Vectors.dense(0.0, 10.0, 0.5), 1.0))
-).toDF("id", "hour", "mobile", "userFeatures", "clicked")
-val assembler = new VectorAssembler()
-  .setInputCols(Array("hour", "mobile", "userFeatures"))
-  .setOutputCol("features")
-val output = assembler.transform(dataset)
-println(output.select("features", "clicked").first())
-{% endhighlight %}
+{% include_example scala/org/apache/spark/examples/ml/VectorAssemblerExample.scala %}
 </div>
 
 <div data-lang="java" markdown="1">
@@ -1652,36 +1039,7 @@ println(output.select("features", "clicked").first())
 Refer to the [VectorAssembler Java docs](api/java/org/apache/spark/ml/feature/VectorAssembler.html)
 for more details on the API.
 
-{% highlight java %}
-import java.util.Arrays;
-
-import org.apache.spark.api.java.JavaRDD;
-import org.apache.spark.mllib.linalg.VectorUDT;
-import org.apache.spark.mllib.linalg.Vectors;
-import org.apache.spark.sql.DataFrame;
-import org.apache.spark.sql.Row;
-import org.apache.spark.sql.RowFactory;
-import org.apache.spark.sql.types.*;
-import static org.apache.spark.sql.types.DataTypes.*;
-
-StructType schema = createStructType(new StructField[] {
-  createStructField("id", IntegerType, false),
-  createStructField("hour", IntegerType, false),
-  createStructField("mobile", DoubleType, false),
-  createStructField("userFeatures", new VectorUDT(), false),
-  createStructField("clicked", DoubleType, false)
-});
-Row row = RowFactory.create(0, 18, 1.0, Vectors.dense(0.0, 10.0, 0.5), 1.0);
-JavaRDD<Row> rdd = jsc.parallelize(Arrays.asList(row));
-DataFrame dataset = sqlContext.createDataFrame(rdd, schema);
-
-VectorAssembler assembler = new VectorAssembler()
-  .setInputCols(new String[] {"hour", "mobile", "userFeatures"})
-  .setOutputCol("features");
-
-DataFrame output = assembler.transform(dataset);
-System.out.println(output.select("features", "clicked").first());
-{% endhighlight %}
+{% include_example java/org/apache/spark/examples/ml/JavaVectorAssembler.java %}
 </div>
 
 <div data-lang="python" markdown="1">
@@ -1800,41 +1158,7 @@ println(output.select("userFeatures", "features").first())
 Refer to the [VectorSlicer Java docs](api/java/org/apache/spark/ml/feature/VectorSlicer.html)
 for more details on the API.
 
-{% highlight java %}
-import java.util.Arrays;
-
-import org.apache.spark.api.java.JavaRDD;
-import org.apache.spark.mllib.linalg.Vectors;
-import org.apache.spark.sql.DataFrame;
-import org.apache.spark.sql.Row;
-import org.apache.spark.sql.RowFactory;
-import org.apache.spark.sql.types.*;
-import static org.apache.spark.sql.types.DataTypes.*;
-
-Attribute[] attrs = new Attribute[]{
-  NumericAttribute.defaultAttr().withName("f1"),
-  NumericAttribute.defaultAttr().withName("f2"),
-  NumericAttribute.defaultAttr().withName("f3")
-};
-AttributeGroup group = new AttributeGroup("userFeatures", attrs);
-
-JavaRDD<Row> jrdd = jsc.parallelize(Lists.newArrayList(
-  RowFactory.create(Vectors.sparse(3, new int[]{0, 1}, new double[]{-2.0, 2.3})),
-  RowFactory.create(Vectors.dense(-2.0, 2.3, 0.0))
-));
-
-DataFrame dataset = jsql.createDataFrame(jrdd, (new StructType()).add(group.toStructField()));
-
-VectorSlicer vectorSlicer = new VectorSlicer()
-  .setInputCol("userFeatures").setOutputCol("features");
-
-vectorSlicer.setIndices(new int[]{1}).setNames(new String[]{"f3"});
-// or slicer.setIndices(new int[]{1, 2}), or slicer.setNames(new String[]{"f2", "f3"})
-
-DataFrame output = vectorSlicer.transform(dataset);
-
-System.out.println(output.select("userFeatures", "features").first());
-{% endhighlight %}
+{% include_example java/org/apache/spark/examples/ml/JavaVectorSlicer.java %}
 </div>
 </div>
 
@@ -1871,21 +1195,7 @@ id | country | hour | clicked | features         | label
 Refer to the [RFormula Scala docs](api/scala/index.html#org.apache.spark.ml.feature.RFormula)
 for more details on the API.
 
-{% highlight scala %}
-import org.apache.spark.ml.feature.RFormula
-
-val dataset = sqlContext.createDataFrame(Seq(
-  (7, "US", 18, 1.0),
-  (8, "CA", 12, 0.0),
-  (9, "NZ", 15, 0.0)
-)).toDF("id", "country", "hour", "clicked")
-val formula = new RFormula()
-  .setFormula("clicked ~ country + hour")
-  .setFeaturesCol("features")
-  .setLabelCol("label")
-val output = formula.fit(dataset).transform(dataset)
-output.select("features", "label").show()
-{% endhighlight %}
+{% include_example scala/org/apache/spark/examples/ml/RFormulaExample.scala %}
 </div>
 
 <div data-lang="java" markdown="1">
@@ -1893,38 +1203,7 @@ output.select("features", "label").show()
 Refer to the [RFormula Java docs](api/java/org/apache/spark/ml/feature/RFormula.html)
 for more details on the API.
 
-{% highlight java %}
-import java.util.Arrays;
-
-import org.apache.spark.api.java.JavaRDD;
-import org.apache.spark.ml.feature.RFormula;
-import org.apache.spark.sql.DataFrame;
-import org.apache.spark.sql.Row;
-import org.apache.spark.sql.RowFactory;
-import org.apache.spark.sql.types.*;
-import static org.apache.spark.sql.types.DataTypes.*;
-
-StructType schema = createStructType(new StructField[] {
-  createStructField("id", IntegerType, false),
-  createStructField("country", StringType, false),
-  createStructField("hour", IntegerType, false),
-  createStructField("clicked", DoubleType, false)
-});
-JavaRDD<Row> rdd = jsc.parallelize(Arrays.asList(
-  RowFactory.create(7, "US", 18, 1.0),
-  RowFactory.create(8, "CA", 12, 0.0),
-  RowFactory.create(9, "NZ", 15, 0.0)
-));
-DataFrame dataset = sqlContext.createDataFrame(rdd, schema);
-
-RFormula formula = new RFormula()
-  .setFormula("clicked ~ country + hour")
-  .setFeaturesCol("features")
-  .setLabelCol("label");
-
-DataFrame output = formula.fit(dataset).transform(dataset);
-output.select("features", "label").show();
-{% endhighlight %}
+{% include_example java/org/apache/spark/examples/ml/JavaRFormula.java %}
 </div>
 
 <div data-lang="python" markdown="1">
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaBinarizer.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaBinarizer.java
new file mode 100644
index 0000000000000..893d3e3c05b0d
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaBinarizer.java
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+import java.util.Arrays;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.ml.feature.Binarizer;
+import org.apache.spark.sql.DataFrame;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.SQLContext;
+import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.types.Metadata;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+
+
+public class JavaBinarizer {
+
+    public static void main(String[] args) {
+        SparkConf conf = new SparkConf().setAppName("JavaBinarizer");
+        JavaSparkContext jsc = new JavaSparkContext(conf);
+        SQLContext jsql = new SQLContext(jsc);
+
+        JavaRDD<Row> jrdd = jsc.parallelize(Arrays.asList(
+                RowFactory.create(0, 0.1),
+                RowFactory.create(1, 0.8),
+                RowFactory.create(2, 0.2)
+        ));
+        StructType schema = new StructType(new StructField[]{
+                new StructField("label", DataTypes.DoubleType, false, Metadata.empty()),
+                new StructField("feature", DataTypes.DoubleType, false, Metadata.empty())
+        });
+        DataFrame continuousDataFrame = jsql.createDataFrame(jrdd, schema);
+        Binarizer binarizer = new Binarizer()
+                .setInputCol("feature")
+                .setOutputCol("binarized_feature")
+                .setThreshold(0.5);
+        DataFrame binarizedDataFrame = binarizer.transform(continuousDataFrame);
+        DataFrame binarizedFeatures = binarizedDataFrame.select("binarized_feature");
+        for (Row r : binarizedFeatures.collect()) {
+            Double binarized_value = r.getDouble(0);
+            System.out.println(binarized_value);
+        }
+    }
+}
\ No newline at end of file
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaBucketizer.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaBucketizer.java
new file mode 100644
index 0000000000000..554d2e8691604
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaBucketizer.java
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+
+import java.util.Arrays;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.ml.feature.Bucketizer;
+import org.apache.spark.sql.DataFrame;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.SQLContext;
+import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.types.Metadata;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+
+public class JavaBucketizer {
+
+    public static void main(String[] args) {
+        SparkConf conf = new SparkConf().setAppName("JavaBucketizer");
+        JavaSparkContext jsc = new JavaSparkContext(conf);
+        SQLContext jsql = new SQLContext(jsc);
+
+        double[] splits = {Double.NEGATIVE_INFINITY, -0.5, 0.0, 0.5, Double.POSITIVE_INFINITY};
+
+        JavaRDD<Row> data = jsc.parallelize(Arrays.asList(
+                RowFactory.create(-0.5),
+                RowFactory.create(-0.3),
+                RowFactory.create(0.0),
+                RowFactory.create(0.2)
+        ));
+        StructType schema = new StructType(new StructField[]{
+                new StructField("features", DataTypes.DoubleType, false, Metadata.empty())
+        });
+        DataFrame dataFrame = jsql.createDataFrame(data, schema);
+
+        Bucketizer bucketizer = new Bucketizer()
+                .setInputCol("features")
+                .setOutputCol("bucketedFeatures")
+                .setSplits(splits);
+
+        // Transform original data into its bucket index.
+        DataFrame bucketedData = bucketizer.transform(dataFrame);
+
+    }
+}
+
+
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaDCT.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaDCT.java
new file mode 100644
index 0000000000000..b990930734010
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaDCT.java
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+import java.util.Arrays;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.ml.feature.DCT;
+import org.apache.spark.mllib.linalg.VectorUDT;
+import org.apache.spark.mllib.linalg.Vectors;
+import org.apache.spark.sql.DataFrame;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.SQLContext;
+import org.apache.spark.sql.types.Metadata;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+
+
+public class JavaDCT {
+
+    public static void main(String[] args) {
+        SparkConf conf = new SparkConf().setAppName("JavaDCT");
+        JavaSparkContext jsc = new JavaSparkContext(conf);
+        SQLContext jsql = new SQLContext(jsc);
+
+        JavaRDD<Row> data = jsc.parallelize(Arrays.asList(
+                RowFactory.create(Vectors.dense(0.0, 1.0, -2.0, 3.0)),
+                RowFactory.create(Vectors.dense(-1.0, 2.0, 4.0, -7.0)),
+                RowFactory.create(Vectors.dense(14.0, -2.0, -5.0, 1.0))
+        ));
+        StructType schema = new StructType(new StructField[]{
+                new StructField("features", new VectorUDT(), false, Metadata.empty()),
+        });
+        DataFrame df = jsql.createDataFrame(data, schema);
+        DCT dct = new DCT()
+                .setInputCol("features")
+                .setOutputCol("featuresDCT")
+                .setInverse(false);
+        DataFrame dctDf = dct.transform(df);
+        dctDf.select("featuresDCT").show(3);
+    }
+}
+
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaElementwiseProduct.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaElementwiseProduct.java
new file mode 100644
index 0000000000000..ed99fac974701
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaElementwiseProduct.java
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.ml.feature.ElementwiseProduct;
+import org.apache.spark.mllib.linalg.Vector;
+import org.apache.spark.mllib.linalg.Vectors;
+import org.apache.spark.sql.DataFrame;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.SQLContext;
+import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+
+public class JavaElementwiseProduct {
+
+    public static void main(String[] args) {
+        SparkConf conf = new SparkConf().setAppName("JavaElementwiseProduct");
+        JavaSparkContext jsc = new JavaSparkContext(conf);
+        SQLContext sqlContext = new SQLContext(jsc);
+
+        // Create some vector data; also works for sparse vectors
+        JavaRDD<Row> jrdd = jsc.parallelize(Arrays.asList(
+                RowFactory.create("a", Vectors.dense(1.0, 2.0, 3.0)),
+                RowFactory.create("b", Vectors.dense(4.0, 5.0, 6.0))
+        ));
+        List<StructField> fields = new ArrayList<StructField>(2);
+        fields.add(DataTypes.createStructField("id", DataTypes.StringType, false));
+        fields.add(DataTypes.createStructField("vector", DataTypes.StringType, false));
+        StructType schema = DataTypes.createStructType(fields);
+        DataFrame dataFrame = sqlContext.createDataFrame(jrdd, schema);
+        Vector transformingVector = Vectors.dense(0.0, 1.0, 2.0);
+        ElementwiseProduct transformer = new ElementwiseProduct()
+                .setScalingVec(transformingVector)
+                .setInputCol("vector")
+                .setOutputCol("transformedVector");
+        // Batch transform the vectors to create new column:
+        transformer.transform(dataFrame).show();
+
+
+    }
+}
\ No newline at end of file
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaNGram.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaNGram.java
new file mode 100644
index 0000000000000..b40d2c233d853
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaNGram.java
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.sql.SQLContext;
+
+import java.util.Arrays;
+
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.ml.feature.NGram;
+import org.apache.spark.sql.DataFrame;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.types.Metadata;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+
+public class JavaNGram {
+
+
+    public static void main(String[] args) {
+        SparkConf conf = new SparkConf().setAppName("JavaNGram");
+        JavaSparkContext jsc = new JavaSparkContext(conf);
+        SQLContext sqlContext = new SQLContext(jsc);
+
+        JavaRDD<Row> jrdd = jsc.parallelize(Arrays.asList(
+                RowFactory.create(0.0, Arrays.asList("Hi", "I", "heard", "about", "Spark")),
+                RowFactory.create(1.0, Arrays.asList("I", "wish", "Java", "could", "use", "case", "classes")),
+                RowFactory.create(2.0, Arrays.asList("Logistic", "regression", "models", "are", "neat"))
+        ));
+        StructType schema = new StructType(new StructField[]{
+                new StructField("label", DataTypes.DoubleType, false, Metadata.empty()),
+                new StructField("words", DataTypes.createArrayType(DataTypes.StringType), false, Metadata.empty())
+        });
+        DataFrame wordDataFrame = sqlContext.createDataFrame(jrdd, schema);
+        NGram ngramTransformer = new NGram().setInputCol("words").setOutputCol("ngrams");
+        DataFrame ngramDataFrame = ngramTransformer.transform(wordDataFrame);
+        for (Row r : ngramDataFrame.select("ngrams", "label").take(3)) {
+            java.util.List<String> ngrams = r.getList(0);
+            for (String ngram : ngrams) System.out.print(ngram + " --- ");
+            System.out.println();
+        }
+    }
+}
\ No newline at end of file
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaOneHotEncoder.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaOneHotEncoder.java
new file mode 100644
index 0000000000000..a415619762b55
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaOneHotEncoder.java
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+import java.util.Arrays;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.ml.feature.OneHotEncoder;
+import org.apache.spark.ml.feature.StringIndexer;
+import org.apache.spark.ml.feature.StringIndexerModel;
+import org.apache.spark.sql.DataFrame;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.SQLContext;
+import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.types.Metadata;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+
+public class JavaOneHotEncoder {
+
+    public static void main(String[] args) {
+        SparkConf conf = new SparkConf().setAppName("JavaOneHotEncoder");
+        JavaSparkContext jsc = new JavaSparkContext(conf);
+        SQLContext sqlContext = new SQLContext(jsc);
+
+        JavaRDD<Row> jrdd = jsc.parallelize(Arrays.asList(
+                RowFactory.create(0, "a"),
+                RowFactory.create(1, "b"),
+                RowFactory.create(2, "c"),
+                RowFactory.create(3, "a"),
+                RowFactory.create(4, "a"),
+                RowFactory.create(5, "c")
+        ));
+        StructType schema = new StructType(new StructField[]{
+                new StructField("id", DataTypes.DoubleType, false, Metadata.empty()),
+                new StructField("category", DataTypes.StringType, false, Metadata.empty())
+        });
+        DataFrame df = sqlContext.createDataFrame(jrdd, schema);
+        StringIndexerModel indexer = new StringIndexer()
+                .setInputCol("category")
+                .setOutputCol("categoryIndex")
+                .fit(df);
+        DataFrame indexed = indexer.transform(df);
+
+        OneHotEncoder encoder = new OneHotEncoder()
+                .setInputCol("categoryIndex")
+                .setOutputCol("categoryVec");
+        DataFrame encoded = encoder.transform(indexed);
+
+    }
+}
+
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaPCAExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaPCAExample.java
new file mode 100644
index 0000000000000..e2d61d65a3fa8
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaPCAExample.java
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+import java.util.Arrays;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.ml.feature.PCA;
+import org.apache.spark.ml.feature.PCAModel;
+import org.apache.spark.mllib.linalg.VectorUDT;
+import org.apache.spark.mllib.linalg.Vectors;
+import org.apache.spark.sql.DataFrame;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.SQLContext;
+import org.apache.spark.sql.types.Metadata;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+
+public class JavaPCAExample {
+
+    public static void main(String[] args) {
+        SparkConf conf = new SparkConf().setAppName("JavaPCAExample");
+        JavaSparkContext jsc = new JavaSparkContext(conf);
+        SQLContext jsql = new SQLContext(jsc);
+
+        JavaRDD<Row> data = jsc.parallelize(Arrays.asList(
+                RowFactory.create(Vectors.sparse(5, new int[]{1, 3}, new double[]{1.0, 7.0})),
+                RowFactory.create(Vectors.dense(2.0, 0.0, 3.0, 4.0, 5.0)),
+                RowFactory.create(Vectors.dense(4.0, 0.0, 0.0, 6.0, 7.0))
+        ));
+        StructType schema = new StructType(new StructField[]{
+                new StructField("features", new VectorUDT(), false, Metadata.empty()),
+        });
+        DataFrame df = jsql.createDataFrame(data, schema);
+        PCAModel pca = new PCA()
+                .setInputCol("features")
+                .setOutputCol("pcaFeatures")
+                .setK(3)
+                .fit(df);
+        DataFrame result = pca.transform(df).select("pcaFeatures");
+        result.show();
+    }
+}
+
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaPolynomialExpansion.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaPolynomialExpansion.java
new file mode 100644
index 0000000000000..38eaa740c9138
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaPolynomialExpansion.java
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+import java.util.Arrays;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.ml.feature.PolynomialExpansion;
+import org.apache.spark.mllib.linalg.VectorUDT;
+import org.apache.spark.mllib.linalg.Vectors;
+import org.apache.spark.sql.DataFrame;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.SQLContext;
+import org.apache.spark.sql.types.Metadata;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+
+public class JavaPolynomialExpansion {
+    public static void main(String[] args) {
+        SparkConf conf = new SparkConf().setAppName("JavaPolynomialExpansion");
+        JavaSparkContext jsc = new JavaSparkContext(conf);
+        SQLContext jsql = new SQLContext(jsc);
+
+
+        PolynomialExpansion polyExpansion = new PolynomialExpansion()
+                .setInputCol("features")
+                .setOutputCol("polyFeatures")
+                .setDegree(3);
+        JavaRDD<Row> data = jsc.parallelize(Arrays.asList(
+                RowFactory.create(Vectors.dense(-2.0, 2.3)),
+                RowFactory.create(Vectors.dense(0.0, 0.0)),
+                RowFactory.create(Vectors.dense(0.6, -1.1))
+        ));
+        StructType schema = new StructType(new StructField[]{
+                new StructField("features", new VectorUDT(), false, Metadata.empty()),
+        });
+        DataFrame df = jsql.createDataFrame(data, schema);
+        DataFrame polyDF = polyExpansion.transform(df);
+        Row[] row = polyDF.select("polyFeatures").take(3);
+        for (Row r : row) {
+            System.out.println(r.get(0));
+        }
+    }
+}
\ No newline at end of file
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaRFormula.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaRFormula.java
new file mode 100644
index 0000000000000..ae22124e0e9e1
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaRFormula.java
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+import java.util.Arrays;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.ml.feature.RFormula;
+import org.apache.spark.sql.DataFrame;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.SQLContext;
+import org.apache.spark.sql.types.*;
+import static org.apache.spark.sql.types.DataTypes.*;
+
+public class JavaRFormula {
+
+
+    public static void main(String[] args) {
+        SparkConf conf = new SparkConf().setAppName("JavaRFormula");
+        JavaSparkContext jsc = new JavaSparkContext(conf);
+        SQLContext sqlContext = new SQLContext(jsc);
+
+        StructType schema = createStructType(new StructField[]{
+                createStructField("id", IntegerType, false),
+                createStructField("country", StringType, false),
+                createStructField("hour", IntegerType, false),
+                createStructField("clicked", DoubleType, false)
+        });
+        JavaRDD<Row> rdd = jsc.parallelize(Arrays.asList(
+                RowFactory.create(7, "US", 18, 1.0),
+                RowFactory.create(8, "CA", 12, 0.0),
+                RowFactory.create(9, "NZ", 15, 0.0)
+        ));
+        DataFrame dataset = sqlContext.createDataFrame(rdd, schema);
+
+        RFormula formula = new RFormula()
+                .setFormula("clicked ~ country + hour")
+                .setFeaturesCol("features")
+                .setLabelCol("label");
+
+        DataFrame output = formula.fit(dataset).transform(dataset);
+        output.select("features", "label").show();
+
+    }
+}
+
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaStopWordsRemover.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaStopWordsRemover.java
new file mode 100644
index 0000000000000..c669c9a812de4
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaStopWordsRemover.java
@@ -0,0 +1,59 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+
+import java.util.Arrays;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.ml.feature.StopWordsRemover;
+import org.apache.spark.sql.DataFrame;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.SQLContext;
+import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.types.Metadata;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+
+public class JavaStopWordsRemover {
+
+
+    public static void main(String[] args) {
+        SparkConf conf = new SparkConf().setAppName("JavaStopWordsRemover");
+        JavaSparkContext jsc = new JavaSparkContext(conf);
+        SQLContext jsql = new SQLContext(jsc);
+
+        StopWordsRemover remover = new StopWordsRemover()
+                .setInputCol("raw")
+                .setOutputCol("filtered");
+
+        JavaRDD<Row> rdd = jsc.parallelize(Arrays.asList(
+                RowFactory.create(Arrays.asList("I", "saw", "the", "red", "baloon")),
+                RowFactory.create(Arrays.asList("Mary", "had", "a", "little", "lamb"))
+        ));
+        StructType schema = new StructType(new StructField[]{
+                new StructField("raw", DataTypes.createArrayType(DataTypes.StringType), false, Metadata.empty())
+        });
+        DataFrame dataset = jsql.createDataFrame(rdd, schema);
+
+        remover.transform(dataset).show();
+    }
+}
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaStringIndexer.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaStringIndexer.java
new file mode 100644
index 0000000000000..7e3d67c1c3c7e
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaStringIndexer.java
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+import java.util.Arrays;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.ml.feature.StringIndexer;
+import org.apache.spark.sql.DataFrame;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.SQLContext;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+import static org.apache.spark.sql.types.DataTypes.*;
+
+public class JavaStringIndexer {
+
+    public static void main(String[] args) {
+        SparkConf conf = new SparkConf().setAppName("JavaStringIndexer");
+        JavaSparkContext jsc = new JavaSparkContext(conf);
+        SQLContext sqlContext = new SQLContext(jsc);
+
+        JavaRDD<Row> jrdd = jsc.parallelize(Arrays.asList(
+                RowFactory.create(0, "a"),
+                RowFactory.create(1, "b"),
+                RowFactory.create(2, "c"),
+                RowFactory.create(3, "a"),
+                RowFactory.create(4, "a"),
+                RowFactory.create(5, "c")
+        ));
+        StructType schema = new StructType(new StructField[]{
+                createStructField("id", DoubleType, false),
+                createStructField("category", StringType, false)
+        });
+        DataFrame df = sqlContext.createDataFrame(jrdd, schema);
+        StringIndexer indexer = new StringIndexer()
+                .setInputCol("category")
+                .setOutputCol("categoryIndex");
+        DataFrame indexed = indexer.fit(df).transform(df);
+        indexed.show();
+    }
+}
\ No newline at end of file
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaTokenizer.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaTokenizer.java
new file mode 100644
index 0000000000000..9299c91e78cc2
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaTokenizer.java
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+
+import java.util.Arrays;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.ml.feature.RegexTokenizer;
+import org.apache.spark.ml.feature.Tokenizer;
+import org.apache.spark.sql.DataFrame;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.SQLContext;
+import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.types.Metadata;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+
+public class JavaTokenizer {
+
+    public static void main(String[] args) {
+        SparkConf conf = new SparkConf().setAppName("JavaTokenizer");
+        JavaSparkContext jsc = new JavaSparkContext(conf);
+        SQLContext sqlContext = new SQLContext(jsc);
+
+        JavaRDD<Row> jrdd = jsc.parallelize(Arrays.asList(
+                RowFactory.create(0, "Hi I heard about Spark"),
+                RowFactory.create(1, "I wish Java could use case classes"),
+                RowFactory.create(2, "Logistic,regression,models,are,neat")
+        ));
+        StructType schema = new StructType(new StructField[]{
+                new StructField("label", DataTypes.DoubleType, false, Metadata.empty()),
+                new StructField("sentence", DataTypes.StringType, false, Metadata.empty())
+        });
+        DataFrame sentenceDataFrame = sqlContext.createDataFrame(jrdd, schema);
+        Tokenizer tokenizer = new Tokenizer().setInputCol("sentence").setOutputCol("words");
+        DataFrame wordsDataFrame = tokenizer.transform(sentenceDataFrame);
+        for (
+                Row r
+                : wordsDataFrame.select("words", "label").
+
+                take(3)
+
+                )
+
+        {
+            java.util.List<String> words = r.getList(0);
+            for (String word : words) System.out.print(word + " ");
+            System.out.println();
+        }
+
+        RegexTokenizer regexTokenizer = new RegexTokenizer()
+                .setInputCol("sentence")
+                .setOutputCol("words")
+                .setPattern("\\W");  // alternatively .setPattern("\\w+").setGaps(false);
+    }
+}
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaVectorAssembler.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaVectorAssembler.java
new file mode 100644
index 0000000000000..ae8f5fe5cbc91
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaVectorAssembler.java
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+import java.util.Arrays;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.ml.feature.VectorAssembler;
+import org.apache.spark.mllib.linalg.VectorUDT;
+import org.apache.spark.mllib.linalg.Vectors;
+import org.apache.spark.sql.DataFrame;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.SQLContext;
+import org.apache.spark.sql.types.*;
+import static org.apache.spark.sql.types.DataTypes.*;
+
+public class JavaVectorAssembler {
+
+    public static void main(String[] args) {
+        SparkConf conf = new SparkConf().setAppName("JavaVectorAssembler");
+        JavaSparkContext jsc = new JavaSparkContext(conf);
+        SQLContext sqlContext = new SQLContext(jsc);
+
+        StructType schema = createStructType(new StructField[]{
+                createStructField("id", IntegerType, false),
+                createStructField("hour", IntegerType, false),
+                createStructField("mobile", DoubleType, false),
+                createStructField("userFeatures", new VectorUDT(), false),
+                createStructField("clicked", DoubleType, false)
+        });
+        Row row = RowFactory.create(0, 18, 1.0, Vectors.dense(0.0, 10.0, 0.5), 1.0);
+        JavaRDD<Row> rdd = jsc.parallelize(Arrays.asList(row));
+        DataFrame dataset = sqlContext.createDataFrame(rdd, schema);
+
+        VectorAssembler assembler = new VectorAssembler()
+                .setInputCols(new String[]{"hour", "mobile", "userFeatures"})
+                .setOutputCol("features");
+
+        DataFrame output = assembler.transform(dataset);
+        System.out.println(output.select("features", "clicked").first());
+    }
+}
+
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaVectorSlicer.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaVectorSlicer.java
new file mode 100644
index 0000000000000..b6a09804fa14f
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaVectorSlicer.java
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+import com.google.common.collect.Lists;
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.ml.attribute.Attribute;
+import org.apache.spark.ml.attribute.AttributeGroup;
+import org.apache.spark.ml.attribute.NumericAttribute;
+import org.apache.spark.ml.feature.VectorSlicer;
+import org.apache.spark.mllib.linalg.Vectors;
+import org.apache.spark.sql.DataFrame;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.SQLContext;
+import org.apache.spark.sql.types.*;
+
+public class JavaVectorSlicer {
+
+
+    public static void main(String[] args) {
+        SparkConf conf = new SparkConf().setAppName("JavaVectorAssembler");
+        JavaSparkContext jsc = new JavaSparkContext(conf);
+        SQLContext jsql = new SQLContext(jsc);
+
+        Attribute[] attrs = new Attribute[]{
+                NumericAttribute.defaultAttr().withName("f1"),
+                NumericAttribute.defaultAttr().withName("f2"),
+                NumericAttribute.defaultAttr().withName("f3")
+        };
+        AttributeGroup group = new AttributeGroup("userFeatures", attrs);
+
+        JavaRDD<Row> jrdd = jsc.parallelize(Lists.newArrayList(
+                RowFactory.create(Vectors.sparse(3, new int[]{0, 1}, new double[]{-2.0, 2.3})),
+                RowFactory.create(Vectors.dense(-2.0, 2.3, 0.0))
+        ));
+
+        DataFrame dataset = jsql.createDataFrame(jrdd, (new StructType()).add(group.toStructField()));
+
+        VectorSlicer vectorSlicer = new VectorSlicer()
+                .setInputCol("userFeatures").setOutputCol("features");
+
+        vectorSlicer.setIndices(new int[]{1}).setNames(new String[]{"f3"});
+        // or slicer.setIndices(new int[]{1, 2}), or slicer.setNames(new String[]{"f2", "f3"})
+
+        DataFrame output = vectorSlicer.transform(dataset);
+
+        System.out.println(output.select("userFeatures", "features").first());
+    }
+}
+
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/BinarizerExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/BinarizerExample.scala
new file mode 100644
index 0000000000000..c5d436126711e
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/BinarizerExample.scala
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml
+
+import org.apache.spark.{SparkContext, SparkConf}
+import org.apache.spark.ml.feature.Binarizer
+import org.apache.spark.sql.{SQLContext, DataFrame}
+
+object BinarizerExample {
+  val conf = new SparkConf().setAppName("BinarizerExample")
+  val sc = new SparkContext(conf)
+  val sqlContext = new SQLContext(sc)
+
+  val data = Array(
+    (0, 0.1),
+    (1, 0.8),
+    (2, 0.2)
+  )
+  val dataFrame: DataFrame = sqlContext.createDataFrame(data).toDF("label", "feature")
+
+  val binarizer: Binarizer = new Binarizer()
+    .setInputCol("feature")
+    .setOutputCol("binarized_feature")
+    .setThreshold(0.5)
+
+  val binarizedDataFrame = binarizer.transform(dataFrame)
+  val binarizedFeatures = binarizedDataFrame.select("binarized_feature")
+  binarizedFeatures.collect().foreach(println)
+}
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/BucketizerExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/BucketizerExample.scala
new file mode 100644
index 0000000000000..9ebade20ce56f
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/BucketizerExample.scala
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml
+
+import org.apache.spark.{SparkContext, SparkConf}
+import org.apache.spark.ml.feature.Bucketizer
+import org.apache.spark.sql.{SQLContext, DataFrame}
+
+object BucketizerExample {
+
+  val conf = new SparkConf().setAppName("BucketizerExample")
+  val sc = new SparkContext(conf)
+  val sqlContext = new SQLContext(sc)
+
+  val splits = Array(Double.NegativeInfinity, -0.5, 0.0, 0.5, Double.PositiveInfinity)
+
+  val data = Array(-0.5, -0.3, 0.0, 0.2)
+  val dataFrame = sqlContext.createDataFrame(data.map(Tuple1.apply)).toDF("features")
+
+  val bucketizer = new Bucketizer()
+    .setInputCol("features")
+    .setOutputCol("bucketedFeatures")
+    .setSplits(splits)
+
+  // Transform original data into its bucket index.
+  val bucketedData = bucketizer.transform(dataFrame)
+
+}
+
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/DCTExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/DCTExample.scala
new file mode 100644
index 0000000000000..959df02468006
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/DCTExample.scala
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml
+
+import org.apache.spark.sql.SQLContext
+import org.apache.spark.{SparkContext, SparkConf}
+import org.apache.spark.ml.feature.DCT
+import org.apache.spark.mllib.linalg.Vectors
+
+
+object DCTExample {
+
+  val conf = new SparkConf().setAppName("DCTExample")
+  val sc = new SparkContext(conf)
+  val sqlContext = new SQLContext(sc)
+
+  val data = Seq(
+    Vectors.dense(0.0, 1.0, -2.0, 3.0),
+    Vectors.dense(-1.0, 2.0, 4.0, -7.0),
+    Vectors.dense(14.0, -2.0, -5.0, 1.0))
+  val df = sqlContext.createDataFrame(data.map(Tuple1.apply)).toDF("features")
+  val dct = new DCT()
+    .setInputCol("features")
+    .setOutputCol("featuresDCT")
+    .setInverse(false)
+  val dctDf = dct.transform(df)
+  dctDf.select("featuresDCT").show(3)
+
+}
+
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/ElementWiseProductExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/ElementWiseProductExample.scala
new file mode 100644
index 0000000000000..4bcd7129e26f9
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/ElementWiseProductExample.scala
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml
+
+import org.apache.spark.sql.SQLContext
+import org.apache.spark.{SparkContext, SparkConf}
+import org.apache.spark.ml.feature.ElementwiseProduct
+import org.apache.spark.mllib.linalg.Vectors
+
+object ElementWiseProductExample {
+
+  val conf = new SparkConf().setAppName("OneHotEncoderExample")
+  val sc = new SparkContext(conf)
+  val sqlContext = new SQLContext(sc)
+
+  // Create some vector data; also works for sparse vectors
+  val dataFrame = sqlContext.createDataFrame(Seq(
+    ("a", Vectors.dense(1.0, 2.0, 3.0)),
+    ("b", Vectors.dense(4.0, 5.0, 6.0)))).toDF("id", "vector")
+
+  val transformingVector = Vectors.dense(0.0, 1.0, 2.0)
+  val transformer = new ElementwiseProduct()
+    .setScalingVec(transformingVector)
+    .setInputCol("vector")
+    .setOutputCol("transformedVector")
+
+  // Batch transform the vectors to create new column:
+  transformer.transform(dataFrame).show()
+
+
+}
+
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/MinMaxScalerExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/MinMaxScalerExample.scala
new file mode 100644
index 0000000000000..d782340060d56
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/MinMaxScalerExample.scala
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml
+
+import org.apache.spark.sql.SQLContext
+import org.apache.spark.{SparkContext, SparkConf}
+import org.apache.spark.ml.feature.MinMaxScaler
+
+object MinMaxScalerExample {
+
+  val conf = new SparkConf().setAppName("OneHotEncoderExample")
+  val sc = new SparkContext(conf)
+  val sqlContext = new SQLContext(sc)
+
+
+  val dataFrame = sqlContext.read.format("libsvm")
+    .load("data/mllib/sample_libsvm_data.txt")
+  val scaler = new MinMaxScaler()
+    .setInputCol("features")
+    .setOutputCol("scaledFeatures")
+
+  // Compute summary statistics and generate MinMaxScalerModel
+  val scalerModel = scaler.fit(dataFrame)
+
+  // rescale each feature to range [min, max].
+  val scaledData = scalerModel.transform(dataFrame)
+
+}
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/NGramExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/NGramExample.scala
new file mode 100644
index 0000000000000..a48f0d0d7dab7
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/NGramExample.scala
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+
+package org.apache.spark.examples.ml
+
+import org.apache.spark.sql.SQLContext
+import org.apache.spark.{SparkContext, SparkConf}
+import org.apache.spark.ml.feature.NGram
+
+object NGramExample {
+
+  val conf = new SparkConf().setAppName("NGramExample")
+  val sc = new SparkContext(conf)
+  val sqlContext = new SQLContext(sc)
+
+
+  val wordDataFrame = sqlContext.createDataFrame(Seq(
+    (0, Array("Hi", "I", "heard", "about", "Spark")),
+    (1, Array("I", "wish", "Java", "could", "use", "case", "classes")),
+    (2, Array("Logistic", "regression", "models", "are", "neat"))
+  )).toDF("label", "words")
+
+  val ngram = new NGram().setInputCol("words").setOutputCol("ngrams")
+  val ngramDataFrame = ngram.transform(wordDataFrame)
+  ngramDataFrame.take(3).map(_.getAs[Stream[String]]("ngrams").toList).foreach(println)
+
+
+}
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/NormalizerExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/NormalizerExample.scala
new file mode 100644
index 0000000000000..be9b9694d6e53
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/NormalizerExample.scala
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml
+
+import org.apache.spark.sql.SQLContext
+import org.apache.spark.{SparkContext, SparkConf}
+import org.apache.spark.ml.feature.Normalizer
+
+object NormalizerExample {
+
+  val conf = new SparkConf().setAppName("NormalizerExample")
+  val sc = new SparkContext(conf)
+  val sqlContext = new SQLContext(sc)
+
+  val dataFrame = sqlContext.read.format("libsvm")
+    .load("data/mllib/sample_libsvm_data.txt")
+
+  // Normalize each Vector using $L^1$ norm.
+  val normalizer = new Normalizer()
+    .setInputCol("features")
+    .setOutputCol("normFeatures")
+    .setP(1.0)
+  val l1NormData = normalizer.transform(dataFrame)
+
+  // Normalize each Vector using $L^\infty$ norm.
+  val lInfNormData = normalizer.transform(dataFrame, normalizer.p -> Double.PositiveInfinity)
+
+}
\ No newline at end of file
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/OneHotEncoderExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/OneHotEncoderExample.scala
new file mode 100644
index 0000000000000..673487e661b3d
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/OneHotEncoderExample.scala
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml
+
+import org.apache.spark.sql.SQLContext
+import org.apache.spark.{SparkContext, SparkConf}
+import org.apache.spark.ml.feature.{OneHotEncoder, StringIndexer}
+
+object OneHotEncoderExample {
+
+  val conf = new SparkConf().setAppName("OneHotEncoderExample")
+  val sc = new SparkContext(conf)
+  val sqlContext = new SQLContext(sc)
+
+  val df = sqlContext.createDataFrame(Seq(
+    (0, "a"),
+    (1, "b"),
+    (2, "c"),
+    (3, "a"),
+    (4, "a"),
+    (5, "c")
+  )).toDF("id", "category")
+
+  val indexer = new StringIndexer()
+    .setInputCol("category")
+    .setOutputCol("categoryIndex")
+    .fit(df)
+  val indexed = indexer.transform(df)
+
+  val encoder = new OneHotEncoder().setInputCol("categoryIndex").
+    setOutputCol("categoryVec")
+  val encoded = encoder.transform(indexed)
+  encoded.select("id", "categoryVec").foreach(println)
+
+}
+
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/PCAExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/PCAExample.scala
new file mode 100644
index 0000000000000..bbfed0527e41b
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/PCAExample.scala
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+
+package org.apache.spark.examples.ml
+
+import org.apache.spark.sql.SQLContext
+import org.apache.spark.{SparkContext, SparkConf}
+import org.apache.spark.ml.feature.PCA
+import org.apache.spark.mllib.linalg.Vectors
+
+object PCAExample {
+
+  val conf = new SparkConf().setAppName("PCAExample")
+  val sc = new SparkContext(conf)
+  val sqlContext = new SQLContext(sc)
+
+  val data = Array(
+    Vectors.sparse(5, Seq((1, 1.0), (3, 7.0))),
+    Vectors.dense(2.0, 0.0, 3.0, 4.0, 5.0),
+    Vectors.dense(4.0, 0.0, 0.0, 6.0, 7.0)
+  )
+  val df = sqlContext.createDataFrame(data.map(Tuple1.apply)).toDF("features")
+  val pca = new PCA()
+    .setInputCol("features")
+    .setOutputCol("pcaFeatures")
+    .setK(3)
+    .fit(df)
+  val pcaDF = pca.transform(df)
+  val result = pcaDF.select("pcaFeatures")
+  result.show()
+}
+
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/PolynomialExpansionExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/PolynomialExpansionExample.scala
new file mode 100644
index 0000000000000..ee37aa6942174
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/PolynomialExpansionExample.scala
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml
+
+import org.apache.spark.sql.SQLContext
+import org.apache.spark.{SparkContext, SparkConf}
+import org.apache.spark.ml.feature.PolynomialExpansion
+import org.apache.spark.mllib.linalg.Vectors
+
+object PolynomialExpansionExample {
+
+  val conf = new SparkConf().setAppName("PolynomialExpansionExample")
+  val sc = new SparkContext(conf)
+  val sqlContext = new SQLContext(sc)
+
+
+  val data = Array(
+    Vectors.dense(-2.0, 2.3),
+    Vectors.dense(0.0, 0.0),
+    Vectors.dense(0.6, -1.1)
+  )
+  val df = sqlContext.createDataFrame(data.map(Tuple1.apply)).toDF("features")
+  val polynomialExpansion = new PolynomialExpansion()
+    .setInputCol("features")
+    .setOutputCol("polyFeatures")
+    .setDegree(3)
+  val polyDF = polynomialExpansion.transform(df)
+  polyDF.select("polyFeatures").take(3).foreach(println)
+}
+
+
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/RFormulaExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/RFormulaExample.scala
new file mode 100644
index 0000000000000..0e07d9575668e
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/RFormulaExample.scala
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml
+
+import org.apache.spark.sql.SQLContext
+import org.apache.spark.{SparkContext, SparkConf}
+import org.apache.spark.ml.feature.RFormula
+
+
+object RFormulaExample {
+
+  val conf = new SparkConf().setAppName("RFormulaExample")
+  val sc = new SparkContext(conf)
+  val sqlContext = new SQLContext(sc)
+
+
+  val dataset = sqlContext.createDataFrame(Seq(
+    (7, "US", 18, 1.0),
+    (8, "CA", 12, 0.0),
+    (9, "NZ", 15, 0.0)
+  )).toDF("id", "country", "hour", "clicked")
+  val formula = new RFormula()
+    .setFormula("clicked ~ country + hour")
+    .setFeaturesCol("features")
+    .setLabelCol("label")
+  val output = formula.fit(dataset).transform(dataset)
+  output.select("features", "label").show()
+
+}
\ No newline at end of file
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/StandardScalerExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/StandardScalerExample.scala
new file mode 100644
index 0000000000000..a75b58b7809e4
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/StandardScalerExample.scala
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml
+
+import org.apache.spark.sql.SQLContext
+import org.apache.spark.{SparkContext, SparkConf}
+import org.apache.spark.ml.feature.StandardScaler
+
+object StandardScalerExample {
+
+  val conf = new SparkConf().setAppName("StandardScalerExample")
+  val sc = new SparkContext(conf)
+  val sqlContext = new SQLContext(sc)
+
+
+  val dataFrame = sqlContext.read.format("libsvm")
+    .load("data/mllib/sample_libsvm_data.txt")
+  val scaler = new StandardScaler()
+    .setInputCol("features")
+    .setOutputCol("scaledFeatures")
+    .setWithStd(true)
+    .setWithMean(false)
+
+  // Compute summary statistics by fitting the StandardScaler
+  val scalerModel = scaler.fit(dataFrame)
+
+  // Normalize each feature to have unit standard deviation.
+  val scaledData = scalerModel.transform(dataFrame)
+
+}
\ No newline at end of file
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/StopWordsRemoverExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/StopWordsRemoverExample.scala
new file mode 100644
index 0000000000000..f831040ac4386
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/StopWordsRemoverExample.scala
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+
+package org.apache.spark.examples.ml
+
+import org.apache.spark.sql.SQLContext
+import org.apache.spark.{SparkContext, SparkConf}
+import org.apache.spark.ml.feature.StopWordsRemover
+
+
+object StopWordsRemoverExample {
+
+  val conf = new SparkConf().setAppName("StopWordsRemoverExample")
+  val sc = new SparkContext(conf)
+  val sqlContext = new SQLContext(sc)
+
+  val remover = new StopWordsRemover()
+    .setInputCol("raw")
+    .setOutputCol("filtered")
+  val dataSet = sqlContext.createDataFrame(Seq(
+    (0, Seq("I", "saw", "the", "red", "baloon")),
+    (1, Seq("Mary", "had", "a", "little", "lamb"))
+  )).toDF("id", "raw")
+
+  remover.transform(dataSet).show()
+
+}
+
+// scalastyle:on println
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/StringIndexerExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/StringIndexerExample.scala
new file mode 100644
index 0000000000000..1a20c443367b6
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/StringIndexerExample.scala
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml
+
+import org.apache.spark.sql.SQLContext
+import org.apache.spark.{SparkContext, SparkConf}
+import org.apache.spark.ml.feature.StringIndexer
+
+object StringIndexerExample {
+
+  val conf = new SparkConf().setAppName("StringIndexerExample")
+  val sc = new SparkContext(conf)
+  val sqlContext = new SQLContext(sc)
+
+
+  val df = sqlContext.createDataFrame(
+    Seq((0, "a"), (1, "b"), (2, "c"), (3, "a"), (4, "a"), (5, "c"))
+  ).toDF("id", "category")
+  val indexer = new StringIndexer()
+    .setInputCol("category")
+    .setOutputCol("categoryIndex")
+  val indexed = indexer.fit(df).transform(df)
+  indexed.show()
+}
+
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/TokenizerExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/TokenizerExample.scala
new file mode 100644
index 0000000000000..e0913d4cb422b
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/TokenizerExample.scala
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+import org.apache.spark.ml.feature.{Tokenizer, RegexTokenizer}
+import org.apache.spark.{SparkConf, SparkContext}
+import org.apache.spark.sql.SQLContext
+
+
+object TokenizerExample {
+  val conf = new SparkConf().setAppName("JavaTokenizerExample")
+  val sc = new SparkContext(conf)
+  val sqlContext = new SQLContext(sc)
+
+  val sentenceDataFrame = sqlContext.createDataFrame(Seq(
+    (0, "Hi I heard about Spark"),
+    (1, "I wish Java could use case classes"),
+    (2, "Logistic,regression,models,are,neat")
+  )).toDF("label", "sentence")
+
+  val tokenizer = new Tokenizer().setInputCol("sentence").setOutputCol("words")
+  val regexTokenizer = new RegexTokenizer()
+    .setInputCol("sentence")
+    .setOutputCol("words")
+    .setPattern("\\W") // alternatively .setPattern("\\w+").setGaps(false)
+
+  val tokenized = tokenizer.transform(sentenceDataFrame)
+  tokenized.select("words", "label").take(3).foreach(println)
+  val regexTokenized = regexTokenizer.transform(sentenceDataFrame)
+  regexTokenized.select("words", "label").take(3).foreach(println)
+
+}
+// scalastyle:on println
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/VectorAssemblerExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/VectorAssemblerExample.scala
new file mode 100644
index 0000000000000..70d7d3b805625
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/VectorAssemblerExample.scala
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml
+
+import org.apache.spark.sql.SQLContext
+import org.apache.spark.{SparkContext, SparkConf}
+import org.apache.spark.mllib.linalg.Vectors
+import org.apache.spark.ml.feature.VectorAssembler
+
+object VectorAssemblerExample {
+
+
+  val conf = new SparkConf().setAppName("OneHotEncoderExample")
+  val sc = new SparkContext(conf)
+  val sqlContext = new SQLContext(sc)
+
+  val dataset = sqlContext.createDataFrame(
+    Seq((0, 18, 1.0, Vectors.dense(0.0, 10.0, 0.5), 1.0))
+  ).toDF("id", "hour", "mobile", "userFeatures", "clicked")
+  val assembler = new VectorAssembler()
+    .setInputCols(Array("hour", "mobile", "userFeatures"))
+    .setOutputCol("features")
+  val output = assembler.transform(dataset)
+  println(output.select("features", "clicked").first())
+
+
+}
\ No newline at end of file
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/VectorIndexerExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/VectorIndexerExample.scala
new file mode 100644
index 0000000000000..9367d9e6170db
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/VectorIndexerExample.scala
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml
+
+import org.apache.spark.sql.SQLContext
+import org.apache.spark.{SparkContext, SparkConf}
+import org.apache.spark.ml.feature.VectorIndexer
+
+object VectorIndexerExample {
+  val conf = new SparkConf().setAppName("VectorIndexerExample")
+  val sc = new SparkContext(conf)
+  val sqlContext = new SQLContext(sc)
+
+  val data = sqlContext.read.format("libsvm")
+    .load("data/mllib/sample_libsvm_data.txt")
+  val indexer = new VectorIndexer()
+    .setInputCol("features")
+    .setOutputCol("indexed")
+    .setMaxCategories(10)
+  val indexerModel = indexer.fit(data)
+  val categoricalFeatures: Set[Int] = indexerModel.categoryMaps.keys.toSet
+  println(s"Chose ${categoricalFeatures.size} categorical features: " +
+    categoricalFeatures.mkString(", "))
+
+  // Create new column "indexed" with categorical values transformed to indices
+  val indexedData = indexerModel.transform(data)
+
+
+}