diff --git a/docs/_plugins/include_example.rb b/docs/_plugins/include_example.rb
index 6ea1d438f529e..688943213bddc 100644
--- a/docs/_plugins/include_example.rb
+++ b/docs/_plugins/include_example.rb
@@ -93,10 +93,11 @@ def select_lines(code)
.select { |l, i| l.include? "$example off#{@snippet_label}$" }
.map { |l, i| i }
- raise "Start indices amount is not equal to end indices amount, see #{@file}." \
+ raise "Start indices amount is not equal to end indices amount, "\
+ "see #{@file}, [labeled=#{@snippet_label}]." \
unless startIndices.size == endIndices.size
- raise "No code is selected by include_example, see #{@file}." \
+ raise "No code is selected by include_example, see #{@file}, [labeled=#{@snippet_label}]." \
if startIndices.size == 0
# Select and join code blocks together, with a space line between each of two continuous
@@ -104,9 +105,11 @@ def select_lines(code)
lastIndex = -1
result = ""
startIndices.zip(endIndices).each do |start, endline|
- raise "Overlapping between two example code blocks are not allowed, see #{@file}." \
+ raise "Overlapping between two example code blocks are not allowed, "\
+ "see #{@file}, [labeled=#{@snippet_label}]." \
if start <= lastIndex
- raise "$example on$ should not be in the same line with $example off$, see #{@file}." \
+ raise "$example on$ should not be in the same line with $example off$, "\
+ "see #{@file}, [labeled=#{@snippet_label}]." \
if start == endline
lastIndex = endline
range = Range.new(start + 1, endline - 1)
diff --git a/docs/mllib-data-types.md b/docs/mllib-data-types.md
index 35cee3275e3b5..2bce7eff38322 100644
--- a/docs/mllib-data-types.md
+++ b/docs/mllib-data-types.md
@@ -35,16 +35,7 @@ using the factory methods implemented in
Refer to the [`Vector` Scala docs](api/scala/index.html#org.apache.spark.mllib.linalg.Vector) and [`Vectors` Scala docs](api/scala/index.html#org.apache.spark.mllib.linalg.Vectors$) for details on the API.
-{% highlight scala %}
-import org.apache.spark.mllib.linalg.{Vector, Vectors}
-
-// Create a dense vector (1.0, 0.0, 3.0).
-val dv: Vector = Vectors.dense(1.0, 0.0, 3.0)
-// Create a sparse vector (1.0, 0.0, 3.0) by specifying its indices and values corresponding to nonzero entries.
-val sv1: Vector = Vectors.sparse(3, Array(0, 2), Array(1.0, 3.0))
-// Create a sparse vector (1.0, 0.0, 3.0) by specifying its nonzero entries.
-val sv2: Vector = Vectors.sparse(3, Seq((0, 1.0), (2, 3.0)))
-{% endhighlight %}
+{% include_example local_vector scala/org/apache/spark/examples/mllib/DataTypesExamples.scala %}
***Note:***
Scala imports `scala.collection.immutable.Vector` by default, so you have to import
@@ -63,15 +54,8 @@ using the factory methods implemented in
Refer to the [`Vector` Java docs](api/java/org/apache/spark/mllib/linalg/Vector.html) and [`Vectors` Java docs](api/java/org/apache/spark/mllib/linalg/Vectors.html) for details on the API.
-{% highlight java %}
-import org.apache.spark.mllib.linalg.Vector;
-import org.apache.spark.mllib.linalg.Vectors;
+{% include_example local_vector java/org/apache/spark/examples/mllib/JavaDataTypesExamples.java %}
-// Create a dense vector (1.0, 0.0, 3.0).
-Vector dv = Vectors.dense(1.0, 0.0, 3.0);
-// Create a sparse vector (1.0, 0.0, 3.0) by specifying its indices and values corresponding to nonzero entries.
-Vector sv = Vectors.sparse(3, new int[] {0, 2}, new double[] {1.0, 3.0});
-{% endhighlight %}
@@ -92,20 +76,7 @@ in [`Vectors`](api/python/pyspark.mllib.html#pyspark.mllib.linalg.Vectors) to cr
Refer to the [`Vectors` Python docs](api/python/pyspark.mllib.html#pyspark.mllib.linalg.Vectors) for more details on the API.
-{% highlight python %}
-import numpy as np
-import scipy.sparse as sps
-from pyspark.mllib.linalg import Vectors
-
-# Use a NumPy array as a dense vector.
-dv1 = np.array([1.0, 0.0, 3.0])
-# Use a Python list as a dense vector.
-dv2 = [1.0, 0.0, 3.0]
-# Create a SparseVector.
-sv1 = Vectors.sparse(3, [0, 2], [1.0, 3.0])
-# Use a single-column SciPy csc_matrix as a sparse vector.
-sv2 = sps.csc_matrix((np.array([1.0, 3.0]), np.array([0, 2]), np.array([0, 2])), shape=(3, 1))
-{% endhighlight %}
+{% include_example local_vector python/mllib/datatypes_examples.py %}
@@ -127,16 +98,8 @@ A labeled point is represented by the case class
Refer to the [`LabeledPoint` Scala docs](api/scala/index.html#org.apache.spark.mllib.regression.LabeledPoint) for details on the API.
-{% highlight scala %}
-import org.apache.spark.mllib.linalg.Vectors
-import org.apache.spark.mllib.regression.LabeledPoint
-
-// Create a labeled point with a positive label and a dense feature vector.
-val pos = LabeledPoint(1.0, Vectors.dense(1.0, 0.0, 3.0))
+{% include_example labeled_point scala/org/apache/spark/examples/mllib/DataTypesExamples.scala %}
-// Create a labeled point with a negative label and a sparse feature vector.
-val neg = LabeledPoint(0.0, Vectors.sparse(3, Array(0, 2), Array(1.0, 3.0)))
-{% endhighlight %}
@@ -146,16 +109,8 @@ A labeled point is represented by
Refer to the [`LabeledPoint` Java docs](api/java/org/apache/spark/mllib/regression/LabeledPoint.html) for details on the API.
-{% highlight java %}
-import org.apache.spark.mllib.linalg.Vectors;
-import org.apache.spark.mllib.regression.LabeledPoint;
+{% include_example labeled_point java/org/apache/spark/examples/mllib/JavaDataTypesExamples.java %}
-// Create a labeled point with a positive label and a dense feature vector.
-LabeledPoint pos = new LabeledPoint(1.0, Vectors.dense(1.0, 0.0, 3.0));
-
-// Create a labeled point with a negative label and a sparse feature vector.
-LabeledPoint neg = new LabeledPoint(0.0, Vectors.sparse(3, new int[] {0, 2}, new double[] {1.0, 3.0}));
-{% endhighlight %}
@@ -165,16 +120,8 @@ A labeled point is represented by
Refer to the [`LabeledPoint` Python docs](api/python/pyspark.mllib.html#pyspark.mllib.regression.LabeledPoint) for more details on the API.
-{% highlight python %}
-from pyspark.mllib.linalg import SparseVector
-from pyspark.mllib.regression import LabeledPoint
-
-# Create a labeled point with a positive label and a dense feature vector.
-pos = LabeledPoint(1.0, [1.0, 0.0, 3.0])
+{% include_example labeled_point python/mllib/datatypes_examples.py %}
-# Create a labeled point with a negative label and a sparse feature vector.
-neg = LabeledPoint(0.0, SparseVector(3, [0, 2], [1.0, 3.0]))
-{% endhighlight %}
@@ -201,13 +148,8 @@ examples stored in LIBSVM format.
Refer to the [`MLUtils` Scala docs](api/scala/index.html#org.apache.spark.mllib.util.MLUtils$) for details on the API.
-{% highlight scala %}
-import org.apache.spark.mllib.regression.LabeledPoint
-import org.apache.spark.mllib.util.MLUtils
-import org.apache.spark.rdd.RDD
+{% include_example libsvm scala/org/apache/spark/examples/mllib/DataTypesExamples.scala %}
-val examples: RDD[LabeledPoint] = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt")
-{% endhighlight %}
@@ -216,14 +158,8 @@ examples stored in LIBSVM format.
Refer to the [`MLUtils` Java docs](api/java/org/apache/spark/mllib/util/MLUtils.html) for details on the API.
-{% highlight java %}
-import org.apache.spark.mllib.regression.LabeledPoint;
-import org.apache.spark.mllib.util.MLUtils;
-import org.apache.spark.api.java.JavaRDD;
+{% include_example libsvm java/org/apache/spark/examples/mllib/JavaDataTypesExamples.java %}
-JavaRDD examples =
- MLUtils.loadLibSVMFile(jsc.sc(), "data/mllib/sample_libsvm_data.txt").toJavaRDD();
-{% endhighlight %}
@@ -232,11 +168,8 @@ examples stored in LIBSVM format.
Refer to the [`MLUtils` Python docs](api/python/pyspark.mllib.html#pyspark.mllib.util.MLUtils) for more details on the API.
-{% highlight python %}
-from pyspark.mllib.util import MLUtils
+{% include_example libsvm python/mllib/datatypes_examples.py %}
-examples = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt")
-{% endhighlight %}
@@ -266,15 +199,8 @@ matrices. Remember, local matrices in MLlib are stored in column-major order.
Refer to the [`Matrix` Scala docs](api/scala/index.html#org.apache.spark.mllib.linalg.Matrix) and [`Matrices` Scala docs](api/scala/index.html#org.apache.spark.mllib.linalg.Matrices$) for details on the API.
-{% highlight scala %}
-import org.apache.spark.mllib.linalg.{Matrix, Matrices}
-
-// Create a dense matrix ((1.0, 2.0), (3.0, 4.0), (5.0, 6.0))
-val dm: Matrix = Matrices.dense(3, 2, Array(1.0, 3.0, 5.0, 2.0, 4.0, 6.0))
+{% include_example local_matrix scala/org/apache/spark/examples/mllib/DataTypesExamples.scala %}
-// Create a sparse matrix ((9.0, 0.0), (0.0, 8.0), (0.0, 6.0))
-val sm: Matrix = Matrices.sparse(3, 2, Array(0, 1, 3), Array(0, 2, 1), Array(9, 6, 8))
-{% endhighlight %}
@@ -289,16 +215,8 @@ matrices. Remember, local matrices in MLlib are stored in column-major order.
Refer to the [`Matrix` Java docs](api/java/org/apache/spark/mllib/linalg/Matrix.html) and [`Matrices` Java docs](api/java/org/apache/spark/mllib/linalg/Matrices.html) for details on the API.
-{% highlight java %}
-import org.apache.spark.mllib.linalg.Matrix;
-import org.apache.spark.mllib.linalg.Matrices;
+{% include_example local_matrix java/org/apache/spark/examples/mllib/JavaDataTypesExamples.java %}
-// Create a dense matrix ((1.0, 2.0), (3.0, 4.0), (5.0, 6.0))
-Matrix dm = Matrices.dense(3, 2, new double[] {1.0, 3.0, 5.0, 2.0, 4.0, 6.0});
-
-// Create a sparse matrix ((9.0, 0.0), (0.0, 8.0), (0.0, 6.0))
-Matrix sm = Matrices.sparse(3, 2, new int[] {0, 1, 3}, new int[] {0, 2, 1}, new double[] {9, 6, 8});
-{% endhighlight %}
@@ -313,15 +231,8 @@ matrices. Remember, local matrices in MLlib are stored in column-major order.
Refer to the [`Matrix` Python docs](api/python/pyspark.mllib.html#pyspark.mllib.linalg.Matrix) and [`Matrices` Python docs](api/python/pyspark.mllib.html#pyspark.mllib.linalg.Matrices) for more details on the API.
-{% highlight python %}
-from pyspark.mllib.linalg import Matrix, Matrices
-
-# Create a dense matrix ((1.0, 2.0), (3.0, 4.0), (5.0, 6.0))
-dm2 = Matrices.dense(3, 2, [1, 2, 3, 4, 5, 6])
+{% include_example local_matrix python/mllib/datatypes_examples.py %}
-# Create a sparse matrix ((9.0, 0.0), (0.0, 8.0), (0.0, 6.0))
-sm = Matrices.sparse(3, 2, [0, 1, 3], [0, 2, 1], [9, 6, 8])
-{% endhighlight %}
@@ -369,21 +280,8 @@ For [singular value decomposition (SVD)](https://en.wikipedia.org/wiki/Singular_
Refer to the [`RowMatrix` Scala docs](api/scala/index.html#org.apache.spark.mllib.linalg.distributed.RowMatrix) for details on the API.
-{% highlight scala %}
-import org.apache.spark.mllib.linalg.Vector
-import org.apache.spark.mllib.linalg.distributed.RowMatrix
-
-val rows: RDD[Vector] = ... // an RDD of local vectors
-// Create a RowMatrix from an RDD[Vector].
-val mat: RowMatrix = new RowMatrix(rows)
+{% include_example row_matrix scala/org/apache/spark/examples/mllib/DataTypesExamples.scala %}
-// Get its size.
-val m = mat.numRows()
-val n = mat.numCols()
-
-// QR decomposition
-val qrResult = mat.tallSkinnyQR(true)
-{% endhighlight %}
@@ -393,22 +291,8 @@ created from a `JavaRDD` instance. Then we can compute its column summa
Refer to the [`RowMatrix` Java docs](api/java/org/apache/spark/mllib/linalg/distributed/RowMatrix.html) for details on the API.
-{% highlight java %}
-import org.apache.spark.api.java.JavaRDD;
-import org.apache.spark.mllib.linalg.Vector;
-import org.apache.spark.mllib.linalg.distributed.RowMatrix;
-
-JavaRDD rows = ... // a JavaRDD of local vectors
-// Create a RowMatrix from an JavaRDD.
-RowMatrix mat = new RowMatrix(rows.rdd());
-
-// Get its size.
-long m = mat.numRows();
-long n = mat.numCols();
+{% include_example row_matrix java/org/apache/spark/examples/mllib/JavaDataTypesExamples.java %}
-// QR decomposition
-QRDecomposition result = mat.tallSkinnyQR(true);
-{% endhighlight %}
@@ -418,24 +302,9 @@ created from an `RDD` of vectors.
Refer to the [`RowMatrix` Python docs](api/python/pyspark.mllib.html#pyspark.mllib.linalg.distributed.RowMatrix) for more details on the API.
-{% highlight python %}
-from pyspark.mllib.linalg.distributed import RowMatrix
+{% include_example row_matrix python/mllib/datatypes_examples.py %}
-# Create an RDD of vectors.
-rows = sc.parallelize([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]])
-
-# Create a RowMatrix from an RDD of vectors.
-mat = RowMatrix(rows)
-
-# Get its size.
-m = mat.numRows() # 4
-n = mat.numCols() # 3
-
-# Get the rows as an RDD of vectors again.
-rowsRDD = mat.rows
-{% endhighlight %}
-
### IndexedRowMatrix
@@ -456,20 +325,8 @@ its row indices.
Refer to the [`IndexedRowMatrix` Scala docs](api/scala/index.html#org.apache.spark.mllib.linalg.distributed.IndexedRowMatrix) for details on the API.
-{% highlight scala %}
-import org.apache.spark.mllib.linalg.distributed.{IndexedRow, IndexedRowMatrix, RowMatrix}
-
-val rows: RDD[IndexedRow] = ... // an RDD of indexed rows
-// Create an IndexedRowMatrix from an RDD[IndexedRow].
-val mat: IndexedRowMatrix = new IndexedRowMatrix(rows)
-
-// Get its size.
-val m = mat.numRows()
-val n = mat.numCols()
+{% include_example indexed_row_matrix scala/org/apache/spark/examples/mllib/DataTypesExamples.scala %}
-// Drop its row indices.
-val rowMat: RowMatrix = mat.toRowMatrix()
-{% endhighlight %}
@@ -483,23 +340,8 @@ its row indices.
Refer to the [`IndexedRowMatrix` Java docs](api/java/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.html) for details on the API.
-{% highlight java %}
-import org.apache.spark.api.java.JavaRDD;
-import org.apache.spark.mllib.linalg.distributed.IndexedRow;
-import org.apache.spark.mllib.linalg.distributed.IndexedRowMatrix;
-import org.apache.spark.mllib.linalg.distributed.RowMatrix;
+{% include_example indexed_row_matrix java/org/apache/spark/examples/mllib/JavaDataTypesExamples.java %}
-JavaRDD rows = ... // a JavaRDD of indexed rows
-// Create an IndexedRowMatrix from a JavaRDD.
-IndexedRowMatrix mat = new IndexedRowMatrix(rows.rdd());
-
-// Get its size.
-long m = mat.numRows();
-long n = mat.numCols();
-
-// Drop its row indices.
-RowMatrix rowMat = mat.toRowMatrix();
-{% endhighlight %}
@@ -512,34 +354,9 @@ its row indices.
Refer to the [`IndexedRowMatrix` Python docs](api/python/pyspark.mllib.html#pyspark.mllib.linalg.distributed.IndexedRowMatrix) for more details on the API.
-{% highlight python %}
-from pyspark.mllib.linalg.distributed import IndexedRow, IndexedRowMatrix
-
-# Create an RDD of indexed rows.
-# - This can be done explicitly with the IndexedRow class:
-indexedRows = sc.parallelize([IndexedRow(0, [1, 2, 3]),
- IndexedRow(1, [4, 5, 6]),
- IndexedRow(2, [7, 8, 9]),
- IndexedRow(3, [10, 11, 12])])
-# - or by using (long, vector) tuples:
-indexedRows = sc.parallelize([(0, [1, 2, 3]), (1, [4, 5, 6]),
- (2, [7, 8, 9]), (3, [10, 11, 12])])
-
-# Create an IndexedRowMatrix from an RDD of IndexedRows.
-mat = IndexedRowMatrix(indexedRows)
-
-# Get its size.
-m = mat.numRows() # 4
-n = mat.numCols() # 3
+{% include_example indexed_row_matrix python/mllib/datatypes_examples.py %}
-# Get the rows as an RDD of IndexedRows.
-rowsRDD = mat.rows
-
-# Convert to a RowMatrix by dropping the row indices.
-rowMat = mat.toRowMatrix()
-{% endhighlight %}
-
### CoordinateMatrix
@@ -562,20 +379,8 @@ with sparse rows by calling `toIndexedRowMatrix`. Other computations for
Refer to the [`CoordinateMatrix` Scala docs](api/scala/index.html#org.apache.spark.mllib.linalg.distributed.CoordinateMatrix) for details on the API.
-{% highlight scala %}
-import org.apache.spark.mllib.linalg.distributed.{CoordinateMatrix, MatrixEntry}
-
-val entries: RDD[MatrixEntry] = ... // an RDD of matrix entries
-// Create a CoordinateMatrix from an RDD[MatrixEntry].
-val mat: CoordinateMatrix = new CoordinateMatrix(entries)
+{% include_example coordinate_matrix scala/org/apache/spark/examples/mllib/DataTypesExamples.scala %}
-// Get its size.
-val m = mat.numRows()
-val n = mat.numCols()
-
-// Convert it to an IndexRowMatrix whose rows are sparse vectors.
-val indexedRowMatrix = mat.toIndexedRowMatrix()
-{% endhighlight %}
@@ -590,23 +395,8 @@ with sparse rows by calling `toIndexedRowMatrix`. Other computations for
Refer to the [`CoordinateMatrix` Java docs](api/java/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.html) for details on the API.
-{% highlight java %}
-import org.apache.spark.api.java.JavaRDD;
-import org.apache.spark.mllib.linalg.distributed.CoordinateMatrix;
-import org.apache.spark.mllib.linalg.distributed.IndexedRowMatrix;
-import org.apache.spark.mllib.linalg.distributed.MatrixEntry;
-
-JavaRDD entries = ... // a JavaRDD of matrix entries
-// Create a CoordinateMatrix from a JavaRDD.
-CoordinateMatrix mat = new CoordinateMatrix(entries.rdd());
-
-// Get its size.
-long m = mat.numRows();
-long n = mat.numCols();
+{% include_example coordinate_matrix java/org/apache/spark/examples/mllib/JavaDataTypesExamples.java %}
-// Convert it to an IndexRowMatrix whose rows are sparse vectors.
-IndexedRowMatrix indexedRowMatrix = mat.toIndexedRowMatrix();
-{% endhighlight %}
@@ -619,36 +409,9 @@ calling `toRowMatrix`, or to an `IndexedRowMatrix` with sparse rows by calling `
Refer to the [`CoordinateMatrix` Python docs](api/python/pyspark.mllib.html#pyspark.mllib.linalg.distributed.CoordinateMatrix) for more details on the API.
-{% highlight python %}
-from pyspark.mllib.linalg.distributed import CoordinateMatrix, MatrixEntry
-
-# Create an RDD of coordinate entries.
-# - This can be done explicitly with the MatrixEntry class:
-entries = sc.parallelize([MatrixEntry(0, 0, 1.2), MatrixEntry(1, 0, 2.1), MatrixEntry(6, 1, 3.7)])
-# - or using (long, long, float) tuples:
-entries = sc.parallelize([(0, 0, 1.2), (1, 0, 2.1), (2, 1, 3.7)])
-
-# Create an CoordinateMatrix from an RDD of MatrixEntries.
-mat = CoordinateMatrix(entries)
-
-# Get its size.
-m = mat.numRows() # 3
-n = mat.numCols() # 2
-
-# Get the entries as an RDD of MatrixEntries.
-entriesRDD = mat.entries
-
-# Convert to a RowMatrix.
-rowMat = mat.toRowMatrix()
-
-# Convert to an IndexedRowMatrix.
-indexedRowMat = mat.toIndexedRowMatrix()
+{% include_example coordinate_matrix python/mllib/datatypes_examples.py %}
-# Convert to a BlockMatrix.
-blockMat = mat.toBlockMatrix()
-{% endhighlight %}
-
### BlockMatrix
@@ -670,22 +433,8 @@ Users may change the block size by supplying the values through `toBlockMatrix(r
Refer to the [`BlockMatrix` Scala docs](api/scala/index.html#org.apache.spark.mllib.linalg.distributed.BlockMatrix) for details on the API.
-{% highlight scala %}
-import org.apache.spark.mllib.linalg.distributed.{BlockMatrix, CoordinateMatrix, MatrixEntry}
-
-val entries: RDD[MatrixEntry] = ... // an RDD of (i, j, v) matrix entries
-// Create a CoordinateMatrix from an RDD[MatrixEntry].
-val coordMat: CoordinateMatrix = new CoordinateMatrix(entries)
-// Transform the CoordinateMatrix to a BlockMatrix
-val matA: BlockMatrix = coordMat.toBlockMatrix().cache()
-
-// Validate whether the BlockMatrix is set up properly. Throws an Exception when it is not valid.
-// Nothing happens if it is valid.
-matA.validate()
+{% include_example block_matrix scala/org/apache/spark/examples/mllib/DataTypesExamples.scala %}
-// Calculate A^T A.
-val ata = matA.transpose.multiply(matA)
-{% endhighlight %}
@@ -697,25 +446,8 @@ Users may change the block size by supplying the values through `toBlockMatrix(r
Refer to the [`BlockMatrix` Java docs](api/java/org/apache/spark/mllib/linalg/distributed/BlockMatrix.html) for details on the API.
-{% highlight java %}
-import org.apache.spark.api.java.JavaRDD;
-import org.apache.spark.mllib.linalg.distributed.BlockMatrix;
-import org.apache.spark.mllib.linalg.distributed.CoordinateMatrix;
-import org.apache.spark.mllib.linalg.distributed.IndexedRowMatrix;
+{% include_example block_matrix java/org/apache/spark/examples/mllib/JavaDataTypesExamples.java %}
-JavaRDD entries = ... // a JavaRDD of (i, j, v) Matrix Entries
-// Create a CoordinateMatrix from a JavaRDD.
-CoordinateMatrix coordMat = new CoordinateMatrix(entries.rdd());
-// Transform the CoordinateMatrix to a BlockMatrix
-BlockMatrix matA = coordMat.toBlockMatrix().cache();
-
-// Validate whether the BlockMatrix is set up properly. Throws an Exception when it is not valid.
-// Nothing happens if it is valid.
-matA.validate();
-
-// Calculate A^T A.
-BlockMatrix ata = matA.transpose().multiply(matA);
-{% endhighlight %}
@@ -726,32 +458,7 @@ can be created from an `RDD` of sub-matrix blocks, where a sub-matrix block is a
Refer to the [`BlockMatrix` Python docs](api/python/pyspark.mllib.html#pyspark.mllib.linalg.distributed.BlockMatrix) for more details on the API.
-{% highlight python %}
-from pyspark.mllib.linalg import Matrices
-from pyspark.mllib.linalg.distributed import BlockMatrix
-
-# Create an RDD of sub-matrix blocks.
-blocks = sc.parallelize([((0, 0), Matrices.dense(3, 2, [1, 2, 3, 4, 5, 6])),
- ((1, 0), Matrices.dense(3, 2, [7, 8, 9, 10, 11, 12]))])
-
-# Create a BlockMatrix from an RDD of sub-matrix blocks.
-mat = BlockMatrix(blocks, 3, 2)
-
-# Get its size.
-m = mat.numRows() # 6
-n = mat.numCols() # 2
-
-# Get the blocks as an RDD of sub-matrix blocks.
-blocksRDD = mat.blocks
-
-# Convert to a LocalMatrix.
-localMat = mat.toLocalMatrix()
-
-# Convert to an IndexedRowMatrix.
-indexedRowMat = mat.toIndexedRowMatrix()
+{% include_example block_matrix python/mllib/datatypes_examples.py %}
-# Convert to a CoordinateMatrix.
-coordinateMat = mat.toCoordinateMatrix()
-{% endhighlight %}
diff --git a/examples/src/main/java/org/apache/spark/examples/mllib/JavaDataTypesExamples.java b/examples/src/main/java/org/apache/spark/examples/mllib/JavaDataTypesExamples.java
new file mode 100644
index 0000000000000..24926d4201935
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/examples/mllib/JavaDataTypesExamples.java
@@ -0,0 +1,224 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.mllib;
+
+import java.util.Arrays;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.SparkContext;
+import org.apache.spark.api.java.JavaSparkContext;
+
+// $example on:local_vector$
+import org.apache.spark.mllib.linalg.Vector;
+import org.apache.spark.mllib.linalg.Vectors;
+// $example off:local_vector$
+// $example on:labeled_point$
+import org.apache.spark.mllib.linalg.Vectors;
+import org.apache.spark.mllib.regression.LabeledPoint;
+// $example off:labeled_point$
+// $example on:libsvm$
+import org.apache.spark.mllib.regression.LabeledPoint;
+import org.apache.spark.mllib.util.MLUtils;
+import org.apache.spark.api.java.JavaRDD;
+// $example off:libsvm$
+// $example on:local_matrix$
+import org.apache.spark.mllib.linalg.Matrix;
+import org.apache.spark.mllib.linalg.Matrices;
+// $example off:local_matrix$
+// $example on:row_matrix$
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.mllib.linalg.Vector;
+import org.apache.spark.mllib.linalg.distributed.RowMatrix;
+import org.apache.spark.mllib.linalg.QRDecomposition;
+// $example off:row_matrix$
+// $example on:indexed_row_matrix$
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.mllib.linalg.distributed.IndexedRow;
+import org.apache.spark.mllib.linalg.distributed.IndexedRowMatrix;
+import org.apache.spark.mllib.linalg.distributed.RowMatrix;
+// $example off:indexed_row_matrix$
+// $example on:coordinate_matrix$
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.mllib.linalg.distributed.CoordinateMatrix;
+import org.apache.spark.mllib.linalg.distributed.IndexedRowMatrix;
+import org.apache.spark.mllib.linalg.distributed.MatrixEntry;
+// $example off:coordinate_matrix$
+// $example on:block_matrix$
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.mllib.linalg.distributed.BlockMatrix;
+import org.apache.spark.mllib.linalg.distributed.CoordinateMatrix;
+import org.apache.spark.mllib.linalg.distributed.IndexedRowMatrix;
+// $example off:block_matrix$
+
+
+public class JavaDataTypesExamples {
+
+ private static void localVectorExample() {
+ // $example on:local_vector$
+ // Create a dense vector (1.0, 0.0, 3.0).
+ Vector dv = Vectors.dense(1.0, 0.0, 3.0);
+ // Create a sparse vector (1.0, 0.0, 3.0) by specifying its indices and values corresponding to
+ // nonzero entries.
+ Vector sv = Vectors.sparse(3, new int[] {0, 2}, new double[] {1.0, 3.0});
+ // $example off:local_vector$
+ }
+
+ private static void labeledPointExample() {
+ // $example on:labeled_point$
+ // Create a labeled point with a positive label and a dense feature vector.
+ LabeledPoint pos = new LabeledPoint(1.0, Vectors.dense(1.0, 0.0, 3.0));
+
+ // Create a labeled point with a negative label and a sparse feature vector.
+ LabeledPoint neg =
+ new LabeledPoint(0.0, Vectors.sparse(3, new int[] {0, 2}, new double[] {1.0, 3.0}));
+ // $example off:labeled_point$
+ }
+
+ private static void libsvmExample() {
+ // $example on:libsvm$
+ SparkContext sc = SparkContext.getOrCreate();
+ JavaRDD examples =
+ MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt").toJavaRDD();
+ // $example off:libsvm$
+ }
+
+ private static void localMatrixExample() {
+ // $example on:local_matrix$
+ // Create a dense matrix ((1.0, 2.0), (3.0, 4.0), (5.0, 6.0))
+ Matrix dm = Matrices.dense(3, 2, new double[] {1.0, 3.0, 5.0, 2.0, 4.0, 6.0});
+
+ // Create a sparse matrix ((9.0, 0.0), (0.0, 8.0), (0.0, 6.0))
+ Matrix sm =
+ Matrices.sparse(3, 2, new int[] {0, 1, 3}, new int[] {0, 2, 1}, new double[] {9, 6, 8});
+ // $example off:local_matrix$
+ }
+
+ private static void rowMatrixExample() {
+ SparkContext sc = SparkContext.getOrCreate();
+ JavaSparkContext jsc = JavaSparkContext.fromSparkContext(sc);
+
+ // $example on:row_matrix$
+ Vector v1 = Vectors.dense(1.0, 10.0, 100.0);
+ Vector v2 = Vectors.dense(2.0, 20.0, 200.0);
+ Vector v3 = Vectors.dense(3.0, 30.0, 300.0);
+
+ // a JavaRDD of local vectors
+ JavaRDD rows = jsc.parallelize(Arrays.asList(v1, v2, v3));
+
+ // Create a RowMatrix from an JavaRDD.
+ RowMatrix mat = new RowMatrix(rows.rdd());
+
+ // Get its size.
+ long m = mat.numRows();
+ long n = mat.numCols();
+
+ // QR decomposition
+ QRDecomposition result = mat.tallSkinnyQR(true);
+ // $example off:row_matrix$
+ }
+
+ private static void indexedRowMatrixExample() {
+ SparkContext sc = SparkContext.getOrCreate();
+ JavaSparkContext jsc = JavaSparkContext.fromSparkContext(sc);
+
+ // $example on:indexed_row_matrix$
+ IndexedRow r0 = new IndexedRow(0, Vectors.dense(1, 2, 3));
+ IndexedRow r1 = new IndexedRow(1, Vectors.dense(4, 5, 6));
+ IndexedRow r2 = new IndexedRow(2, Vectors.dense(7, 8, 9));
+ IndexedRow r3 = new IndexedRow(3, Vectors.dense(10, 11, 12));
+
+ // a JavaRDD of indexed rows
+ JavaRDD rows = jsc.parallelize(Arrays.asList(r0, r1, r2, r3));
+
+ // Create an IndexedRowMatrix from a JavaRDD.
+ IndexedRowMatrix mat = new IndexedRowMatrix(rows.rdd());
+
+ // Get its size.
+ long m = mat.numRows();
+ long n = mat.numCols();
+
+ // Drop its row indices.
+ RowMatrix rowMat = mat.toRowMatrix();
+ // $example off:indexed_row_matrix$
+ }
+
+ private static void coordinateMatrixExample() {
+ SparkContext sc = SparkContext.getOrCreate();
+ JavaSparkContext jsc = JavaSparkContext.fromSparkContext(sc);
+
+ // $example on:coordinate_matrix$
+ MatrixEntry me1 = new MatrixEntry(0, 0, 1.2);
+ MatrixEntry me2 = new MatrixEntry(1, 0, 2.1);
+ MatrixEntry me3 = new MatrixEntry(6, 1, 3.7);
+
+ // a JavaRDD of matrix entries
+ JavaRDD entries = jsc.parallelize(Arrays.asList(me1, me2, me3));
+ // Create a CoordinateMatrix from a JavaRDD.
+ CoordinateMatrix mat = new CoordinateMatrix(entries.rdd());
+
+ // Get its size.
+ long m = mat.numRows();
+ long n = mat.numCols();
+
+ // Convert it to an IndexRowMatrix whose rows are sparse vectors.
+ IndexedRowMatrix indexedRowMatrix = mat.toIndexedRowMatrix();
+ // $example off:coordinate_matrix$
+ }
+
+ private static void blockMatrixExample() {
+ SparkContext sc = SparkContext.getOrCreate();
+ JavaSparkContext jsc = JavaSparkContext.fromSparkContext(sc);
+
+ // $example on:block_matrix$
+ MatrixEntry me1 = new MatrixEntry(0, 0, 1.2);
+ MatrixEntry me2 = new MatrixEntry(1, 0, 2.1);
+ MatrixEntry me3 = new MatrixEntry(6, 1, 3.7);
+
+ // A JavaRDD of (i, j, v) Matrix Entries
+ JavaRDD entries = jsc.parallelize(Arrays.asList(me1, me2, me3));
+
+ // Create a CoordinateMatrix from a JavaRDD.
+ CoordinateMatrix coordMat = new CoordinateMatrix(entries.rdd());
+ // Transform the CoordinateMatrix to a BlockMatrix
+ BlockMatrix matA = coordMat.toBlockMatrix().cache();
+
+ // Validate whether the BlockMatrix is set up properly.
+ // Throws an Exception when it is not valid. Nothing happens if it is valid.
+ matA.validate();
+
+ // Calculate A^T A.
+ BlockMatrix ata = matA.transpose().multiply(matA);
+ // $example off:block_matrix$
+ }
+
+ public static void main(String[] args) {
+ SparkConf conf = new SparkConf().setAppName("JavaDataTypesExample");
+ SparkContext sc = new SparkContext(conf);
+
+ localVectorExample();
+ labeledPointExample();
+ libsvmExample();
+ localMatrixExample();
+ rowMatrixExample();
+ indexedRowMatrixExample();
+ coordinateMatrixExample();
+ blockMatrixExample();
+
+ sc.stop();
+ }
+}
diff --git a/examples/src/main/python/mllib/datatypes_examples.py b/examples/src/main/python/mllib/datatypes_examples.py
new file mode 100644
index 0000000000000..bf12c273602ae
--- /dev/null
+++ b/examples/src/main/python/mllib/datatypes_examples.py
@@ -0,0 +1,207 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+
+from pyspark import SparkContext
+from pyspark.sql import SQLContext
+
+
+def __local_vector_example():
+ # $example on:local_vector$
+ import numpy as np
+ import scipy.sparse as sps
+ from pyspark.mllib.linalg import Vectors
+
+ # Use a NumPy array as a dense vector.
+ dv1 = np.array([1.0, 0.0, 3.0])
+ # Use a Python list as a dense vector.
+ dv2 = [1.0, 0.0, 3.0]
+ # Create a SparseVector.
+ sv1 = Vectors.sparse(3, [0, 2], [1.0, 3.0])
+ # Use a single-column SciPy csc_matrix as a sparse vector.
+ sv2 = sps.csc_matrix((np.array([1.0, 3.0]), np.array([0, 2]), np.array([0, 2])), shape=(3, 1))
+ # $example off:local_vector$
+
+
+def __labeled_point_example():
+ # $example on:labeled_point$
+ from pyspark.mllib.linalg import SparseVector
+ from pyspark.mllib.regression import LabeledPoint
+
+ # Create a labeled point with a positive label and a dense feature vector.
+ pos = LabeledPoint(1.0, [1.0, 0.0, 3.0])
+
+ # Create a labeled point with a negative label and a sparse feature vector.
+ neg = LabeledPoint(0.0, SparseVector(3, [0, 2], [1.0, 3.0]))
+ # $example off:labeled_point$
+
+
+def __libsvm_example():
+ sc = SparkContext.getOrCreate()
+
+ # $example on:libsvm$
+ from pyspark.mllib.util import MLUtils
+
+ examples = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt")
+ # $example off:libsvm$
+
+
+def __local_matrix_example():
+ # $example on:local_matrix$
+ from pyspark.mllib.linalg import Matrix, Matrices
+
+ # Create a dense matrix ((1.0, 2.0), (3.0, 4.0), (5.0, 6.0))
+ dm2 = Matrices.dense(3, 2, [1, 2, 3, 4, 5, 6])
+
+ # Create a sparse matrix ((9.0, 0.0), (0.0, 8.0), (0.0, 6.0))
+ sm = Matrices.sparse(3, 2, [0, 1, 3], [0, 2, 1], [9, 6, 8])
+ # $example off:local_matrix$
+
+
+def __row_matrix_example():
+ sc = SparkContext.getOrCreate()
+
+ # $example on:row_matrix$
+ from pyspark.mllib.linalg.distributed import RowMatrix
+
+ # Create an RDD of vectors.
+ rows = sc.parallelize([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]], 1)
+
+ # Create a RowMatrix from an RDD of vectors.
+ mat = RowMatrix(rows)
+
+ # Get its size.
+ m = mat.numRows() # 4
+ n = mat.numCols() # 3
+
+ # QR decomposition
+ qrResult = mat.tallSkinnyQR(True)
+ # $example off:row_matrix$
+
+
+def __indexed_row_matrix_example():
+ sc = SparkContext.getOrCreate()
+ sqlContext = SQLContext.getOrCreate(sc)
+
+ # $example on:indexed_row_matrix$
+ from pyspark.mllib.linalg.distributed import IndexedRow, IndexedRowMatrix
+
+ # Create an RDD of indexed rows.
+ # - This can be done explicitly with the IndexedRow class:
+ indexedRows = sc.parallelize([IndexedRow(0, [1, 2, 3]),
+ IndexedRow(1, [4, 5, 6]),
+ IndexedRow(2, [7, 8, 9]),
+ IndexedRow(3, [10, 11, 12])])
+ # - or by using (long, vector) tuples:
+ indexedRows = sc.parallelize([(0, [1, 2, 3]), (1, [4, 5, 6]),
+ (2, [7, 8, 9]), (3, [10, 11, 12])])
+
+ # Create an IndexedRowMatrix from an RDD of IndexedRows.
+ mat = IndexedRowMatrix(indexedRows)
+
+ # Get its size.
+ m = mat.numRows() # 4
+ n = mat.numCols() # 3
+
+ # Get the rows as an RDD of IndexedRows.
+ rowsRDD = mat.rows
+
+ # Convert to a RowMatrix by dropping the row indices.
+ rowMat = mat.toRowMatrix()
+ # $example off:indexed_row_matrix$
+
+
+def __coordinate_matrix_example():
+ sc = SparkContext.getOrCreate()
+
+ # $example on:coordinate_matrix$
+ from pyspark.mllib.linalg.distributed import CoordinateMatrix, MatrixEntry
+
+ # Create an RDD of coordinate entries.
+ # - This can be done explicitly with the MatrixEntry class:
+ entries =\
+ sc.parallelize([MatrixEntry(0, 0, 1.2), MatrixEntry(1, 0, 2.1), MatrixEntry(6, 1, 3.7)])
+ # - or using (long, long, float) tuples:
+ entries = sc.parallelize([(0, 0, 1.2), (1, 0, 2.1), (2, 1, 3.7)])
+
+ # Create an CoordinateMatrix from an RDD of MatrixEntries.
+ mat = CoordinateMatrix(entries)
+
+ # Get its size.
+ m = mat.numRows() # 3
+ n = mat.numCols() # 2
+
+ # Get the entries as an RDD of MatrixEntries.
+ entriesRDD = mat.entries
+
+ # Convert to a RowMatrix.
+ rowMat = mat.toRowMatrix()
+
+ # Convert to an IndexedRowMatrix.
+ indexedRowMat = mat.toIndexedRowMatrix()
+
+ # Convert to a BlockMatrix.
+ blockMat = mat.toBlockMatrix()
+ # $example off:coordinate_matrix$
+
+
+def __block_matrix():
+ sc = SparkContext.getOrCreate()
+
+ # $example on:block_matrix$
+ from pyspark.mllib.linalg import Matrices
+ from pyspark.mllib.linalg.distributed import BlockMatrix
+
+ # Create an RDD of sub-matrix blocks.
+ blocks = sc.parallelize([((0, 0), Matrices.dense(3, 2, [1, 2, 3, 4, 5, 6])),
+ ((1, 0), Matrices.dense(3, 2, [7, 8, 9, 10, 11, 12]))])
+
+ # Create a BlockMatrix from an RDD of sub-matrix blocks.
+ mat = BlockMatrix(blocks, 3, 2)
+
+ # Get its size.
+ m = mat.numRows() # 6
+ n = mat.numCols() # 2
+
+ # Get the blocks as an RDD of sub-matrix blocks.
+ blocksRDD = mat.blocks
+
+ # Convert to a LocalMatrix.
+ localMat = mat.toLocalMatrix()
+
+ # Convert to an IndexedRowMatrix.
+ indexedRowMat = mat.toIndexedRowMatrix()
+
+ # Convert to a CoordinateMatrix.
+ coordinateMat = mat.toCoordinateMatrix()
+ # $example off:block_matrix$
+
+
+if __name__ == "__main__":
+ sc = SparkContext(appName="PythonDataTypesExamples") # SparkContext
+
+ __local_vector_example()
+ __labeled_point_example()
+ __libsvm_example()
+ __local_matrix_example()
+ __row_matrix_example()
+ __indexed_row_matrix_example()
+ __coordinate_matrix_example()
+ __block_matrix()
+
+ sc.stop()
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/DataTypesExamples.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/DataTypesExamples.scala
new file mode 100644
index 0000000000000..28c41b8d64988
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/DataTypesExamples.scala
@@ -0,0 +1,195 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.mllib
+
+import org.apache.spark.{SparkConf, SparkContext}
+
+
+object DataTypesExamples {
+
+ private def localVectorExample(): Unit = {
+ // $example on:local_vector$
+ import org.apache.spark.mllib.linalg.{Vector, Vectors}
+
+ // Create a dense vector (1.0, 0.0, 3.0).
+ val dv: Vector = Vectors.dense(1.0, 0.0, 3.0)
+ // Create a sparse vector (1.0, 0.0, 3.0) by specifying its indices and values corresponding to
+ // nonzero entries.
+ val sv1: Vector = Vectors.sparse(3, Array(0, 2), Array(1.0, 3.0))
+ // Create a sparse vector (1.0, 0.0, 3.0) by specifying its nonzero entries.
+ val sv2: Vector = Vectors.sparse(3, Seq((0, 1.0), (2, 3.0)))
+ // $example off:local_vector$
+ }
+
+ private def labeledPointExample(): Unit = {
+ // $example on:labeled_point$
+ import org.apache.spark.mllib.linalg.Vectors
+ import org.apache.spark.mllib.regression.LabeledPoint
+
+ // Create a labeled point with a positive label and a dense feature vector.
+ val pos = LabeledPoint(1.0, Vectors.dense(1.0, 0.0, 3.0))
+
+ // Create a labeled point with a negative label and a sparse feature vector.
+ val neg = LabeledPoint(0.0, Vectors.sparse(3, Array(0, 2), Array(1.0, 3.0)))
+ // $example off:labeled_point$
+ }
+
+ private def libsvmExample(): Unit = {
+ val sc = SparkContext.getOrCreate()
+ // $example on:libsvm$
+ import org.apache.spark.mllib.regression.LabeledPoint
+ import org.apache.spark.mllib.util.MLUtils
+ import org.apache.spark.rdd.RDD
+
+ val examples: RDD[LabeledPoint] =
+ MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt")
+ // $example off:libsvm$
+ }
+
+ private def localMatrixExample(): Unit = {
+ // $example on:local_matrix$
+ import org.apache.spark.mllib.linalg.{Matrix, Matrices}
+
+ // Create a dense matrix ((1.0, 2.0), (3.0, 4.0), (5.0, 6.0))
+ val dm: Matrix = Matrices.dense(3, 2, Array(1.0, 3.0, 5.0, 2.0, 4.0, 6.0))
+
+ // Create a sparse matrix ((9.0, 0.0), (0.0, 8.0), (0.0, 6.0))
+ val sm: Matrix = Matrices.sparse(3, 2, Array(0, 1, 3), Array(0, 2, 1), Array(9, 6, 8))
+ // $example off:local_matrix$
+ }
+
+ private def rowMatrixExample(): Unit = {
+ val sc = SparkContext.getOrCreate()
+ // $example on:row_matrix$
+ import org.apache.spark.mllib.linalg.{Vector, Vectors}
+ import org.apache.spark.mllib.linalg.distributed.RowMatrix
+ import org.apache.spark.rdd.RDD
+
+ val v1 = Vectors.dense(1.0, 10.0, 100.0)
+ val v2 = Vectors.dense(2.0, 20.0, 200.0)
+ val v3 = Vectors.dense(3.0, 30.0, 300.0)
+
+ val rows: RDD[Vector] = sc.parallelize(Seq(v1, v2, v3)) // an RDD of local vectors
+ // Create a RowMatrix from an RDD[Vector].
+ val mat: RowMatrix = new RowMatrix(rows)
+
+ // Get its size.
+ val m = mat.numRows()
+ val n = mat.numCols()
+
+ // QR decomposition
+ val qrResult = mat.tallSkinnyQR(true)
+ // $example off:row_matrix$
+ }
+
+ private def indexedRowMatrixExample(): Unit = {
+ val sc = SparkContext.getOrCreate()
+
+ // $example on:indexed_row_matrix$
+ import org.apache.spark.mllib.linalg.Vectors
+ import org.apache.spark.mllib.linalg.distributed.{IndexedRow, IndexedRowMatrix, RowMatrix}
+ import org.apache.spark.rdd.RDD
+
+ val r0 = IndexedRow(0, Vectors.dense(1, 2, 3))
+ val r1 = IndexedRow(1, Vectors.dense(4, 5, 6))
+ val r2 = IndexedRow(2, Vectors.dense(7, 8, 9))
+ val r3 = IndexedRow(3, Vectors.dense(10, 11, 12))
+
+ val rows: RDD[IndexedRow] = sc.parallelize(Seq(r0, r1, r2, r3)) // an RDD of indexed rows
+ // Create an IndexedRowMatrix from an RDD[IndexedRow].
+ val mat: IndexedRowMatrix = new IndexedRowMatrix(rows)
+
+ // Get its size.
+ val m = mat.numRows()
+ val n = mat.numCols()
+
+ // Drop its row indices.
+ val rowMat: RowMatrix = mat.toRowMatrix()
+ // $example off:indexed_row_matrix$
+ }
+
+ private def coordinateMatrixExample(): Unit = {
+ val sc = SparkContext.getOrCreate()
+
+ // $example on:coordinate_matrix$
+ import org.apache.spark.mllib.linalg.distributed.{CoordinateMatrix, MatrixEntry}
+ import org.apache.spark.rdd.RDD
+
+ val me1 = MatrixEntry(0, 0, 1.2)
+ val me2 = MatrixEntry(1, 0, 2.1)
+ val me3 = MatrixEntry(6, 1, 3.7)
+
+ val entries: RDD[MatrixEntry] = sc.parallelize(Seq(me1, me2, me3)) // an RDD of matrix entries
+ // Create a CoordinateMatrix from an RDD[MatrixEntry].
+ val mat: CoordinateMatrix = new CoordinateMatrix(entries)
+
+ // Get its size.
+ val m = mat.numRows()
+ val n = mat.numCols()
+
+ // Convert it to an IndexRowMatrix whose rows are sparse vectors.
+ val indexedRowMatrix = mat.toIndexedRowMatrix()
+ // $example off:coordinate_matrix$
+ }
+
+ private def blockMatrixExample(): Unit = {
+ val sc = SparkContext.getOrCreate()
+
+ // $example on:block_matrix$
+ import org.apache.spark.mllib.linalg.distributed.{BlockMatrix, CoordinateMatrix, MatrixEntry}
+ import org.apache.spark.rdd.RDD
+
+ val me1 = MatrixEntry(0, 0, 1.2)
+ val me2 = MatrixEntry(1, 0, 2.1)
+ val me3 = MatrixEntry(6, 1, 3.7)
+
+ // an RDD of (i, j, v) matrix entries
+ val entries: RDD[MatrixEntry] = sc.parallelize(Seq(me1, me2, me3))
+ // Create a CoordinateMatrix from an RDD[MatrixEntry].
+ val coordMat: CoordinateMatrix = new CoordinateMatrix(entries)
+ // Transform the CoordinateMatrix to a BlockMatrix
+ val matA: BlockMatrix = coordMat.toBlockMatrix().cache()
+
+ // Validate whether the BlockMatrix is set up properly.
+ // Throws an Exception when it is not valid.
+ // Nothing happens if it is valid.
+ matA.validate()
+
+ // Calculate A^T A.
+ val ata = matA.transpose.multiply(matA)
+ // $example off:block_matrix$
+ }
+
+ def main(args: Array[String]): Unit = {
+ val conf = new SparkConf().setAppName("DataTypesExamples")
+ val sc = new SparkContext(conf)
+
+ localVectorExample()
+ labeledPointExample()
+ libsvmExample()
+ localMatrixExample()
+ rowMatrixExample()
+ indexedRowMatrixExample()
+ coordinateMatrixExample()
+ blockMatrixExample()
+
+ sc.stop()
+ }
+}
+// scalastyle:on println