diff --git a/docs/_plugins/include_example.rb b/docs/_plugins/include_example.rb index 6ea1d438f529e..688943213bddc 100644 --- a/docs/_plugins/include_example.rb +++ b/docs/_plugins/include_example.rb @@ -93,10 +93,11 @@ def select_lines(code) .select { |l, i| l.include? "$example off#{@snippet_label}$" } .map { |l, i| i } - raise "Start indices amount is not equal to end indices amount, see #{@file}." \ + raise "Start indices amount is not equal to end indices amount, "\ + "see #{@file}, [labeled=#{@snippet_label}]." \ unless startIndices.size == endIndices.size - raise "No code is selected by include_example, see #{@file}." \ + raise "No code is selected by include_example, see #{@file}, [labeled=#{@snippet_label}]." \ if startIndices.size == 0 # Select and join code blocks together, with a space line between each of two continuous @@ -104,9 +105,11 @@ def select_lines(code) lastIndex = -1 result = "" startIndices.zip(endIndices).each do |start, endline| - raise "Overlapping between two example code blocks are not allowed, see #{@file}." \ + raise "Overlapping between two example code blocks are not allowed, "\ + "see #{@file}, [labeled=#{@snippet_label}]." \ if start <= lastIndex - raise "$example on$ should not be in the same line with $example off$, see #{@file}." \ + raise "$example on$ should not be in the same line with $example off$, "\ + "see #{@file}, [labeled=#{@snippet_label}]." \ if start == endline lastIndex = endline range = Range.new(start + 1, endline - 1) diff --git a/docs/mllib-data-types.md b/docs/mllib-data-types.md index 35cee3275e3b5..2bce7eff38322 100644 --- a/docs/mllib-data-types.md +++ b/docs/mllib-data-types.md @@ -35,16 +35,7 @@ using the factory methods implemented in Refer to the [`Vector` Scala docs](api/scala/index.html#org.apache.spark.mllib.linalg.Vector) and [`Vectors` Scala docs](api/scala/index.html#org.apache.spark.mllib.linalg.Vectors$) for details on the API. -{% highlight scala %} -import org.apache.spark.mllib.linalg.{Vector, Vectors} - -// Create a dense vector (1.0, 0.0, 3.0). -val dv: Vector = Vectors.dense(1.0, 0.0, 3.0) -// Create a sparse vector (1.0, 0.0, 3.0) by specifying its indices and values corresponding to nonzero entries. -val sv1: Vector = Vectors.sparse(3, Array(0, 2), Array(1.0, 3.0)) -// Create a sparse vector (1.0, 0.0, 3.0) by specifying its nonzero entries. -val sv2: Vector = Vectors.sparse(3, Seq((0, 1.0), (2, 3.0))) -{% endhighlight %} +{% include_example local_vector scala/org/apache/spark/examples/mllib/DataTypesExamples.scala %} ***Note:*** Scala imports `scala.collection.immutable.Vector` by default, so you have to import @@ -63,15 +54,8 @@ using the factory methods implemented in Refer to the [`Vector` Java docs](api/java/org/apache/spark/mllib/linalg/Vector.html) and [`Vectors` Java docs](api/java/org/apache/spark/mllib/linalg/Vectors.html) for details on the API. -{% highlight java %} -import org.apache.spark.mllib.linalg.Vector; -import org.apache.spark.mllib.linalg.Vectors; +{% include_example local_vector java/org/apache/spark/examples/mllib/JavaDataTypesExamples.java %} -// Create a dense vector (1.0, 0.0, 3.0). -Vector dv = Vectors.dense(1.0, 0.0, 3.0); -// Create a sparse vector (1.0, 0.0, 3.0) by specifying its indices and values corresponding to nonzero entries. -Vector sv = Vectors.sparse(3, new int[] {0, 2}, new double[] {1.0, 3.0}); -{% endhighlight %}
@@ -92,20 +76,7 @@ in [`Vectors`](api/python/pyspark.mllib.html#pyspark.mllib.linalg.Vectors) to cr Refer to the [`Vectors` Python docs](api/python/pyspark.mllib.html#pyspark.mllib.linalg.Vectors) for more details on the API. -{% highlight python %} -import numpy as np -import scipy.sparse as sps -from pyspark.mllib.linalg import Vectors - -# Use a NumPy array as a dense vector. -dv1 = np.array([1.0, 0.0, 3.0]) -# Use a Python list as a dense vector. -dv2 = [1.0, 0.0, 3.0] -# Create a SparseVector. -sv1 = Vectors.sparse(3, [0, 2], [1.0, 3.0]) -# Use a single-column SciPy csc_matrix as a sparse vector. -sv2 = sps.csc_matrix((np.array([1.0, 3.0]), np.array([0, 2]), np.array([0, 2])), shape=(3, 1)) -{% endhighlight %} +{% include_example local_vector python/mllib/datatypes_examples.py %}
@@ -127,16 +98,8 @@ A labeled point is represented by the case class Refer to the [`LabeledPoint` Scala docs](api/scala/index.html#org.apache.spark.mllib.regression.LabeledPoint) for details on the API. -{% highlight scala %} -import org.apache.spark.mllib.linalg.Vectors -import org.apache.spark.mllib.regression.LabeledPoint - -// Create a labeled point with a positive label and a dense feature vector. -val pos = LabeledPoint(1.0, Vectors.dense(1.0, 0.0, 3.0)) +{% include_example labeled_point scala/org/apache/spark/examples/mllib/DataTypesExamples.scala %} -// Create a labeled point with a negative label and a sparse feature vector. -val neg = LabeledPoint(0.0, Vectors.sparse(3, Array(0, 2), Array(1.0, 3.0))) -{% endhighlight %}
@@ -146,16 +109,8 @@ A labeled point is represented by Refer to the [`LabeledPoint` Java docs](api/java/org/apache/spark/mllib/regression/LabeledPoint.html) for details on the API. -{% highlight java %} -import org.apache.spark.mllib.linalg.Vectors; -import org.apache.spark.mllib.regression.LabeledPoint; +{% include_example labeled_point java/org/apache/spark/examples/mllib/JavaDataTypesExamples.java %} -// Create a labeled point with a positive label and a dense feature vector. -LabeledPoint pos = new LabeledPoint(1.0, Vectors.dense(1.0, 0.0, 3.0)); - -// Create a labeled point with a negative label and a sparse feature vector. -LabeledPoint neg = new LabeledPoint(0.0, Vectors.sparse(3, new int[] {0, 2}, new double[] {1.0, 3.0})); -{% endhighlight %}
@@ -165,16 +120,8 @@ A labeled point is represented by Refer to the [`LabeledPoint` Python docs](api/python/pyspark.mllib.html#pyspark.mllib.regression.LabeledPoint) for more details on the API. -{% highlight python %} -from pyspark.mllib.linalg import SparseVector -from pyspark.mllib.regression import LabeledPoint - -# Create a labeled point with a positive label and a dense feature vector. -pos = LabeledPoint(1.0, [1.0, 0.0, 3.0]) +{% include_example labeled_point python/mllib/datatypes_examples.py %} -# Create a labeled point with a negative label and a sparse feature vector. -neg = LabeledPoint(0.0, SparseVector(3, [0, 2], [1.0, 3.0])) -{% endhighlight %}
@@ -201,13 +148,8 @@ examples stored in LIBSVM format. Refer to the [`MLUtils` Scala docs](api/scala/index.html#org.apache.spark.mllib.util.MLUtils$) for details on the API. -{% highlight scala %} -import org.apache.spark.mllib.regression.LabeledPoint -import org.apache.spark.mllib.util.MLUtils -import org.apache.spark.rdd.RDD +{% include_example libsvm scala/org/apache/spark/examples/mllib/DataTypesExamples.scala %} -val examples: RDD[LabeledPoint] = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt") -{% endhighlight %}
@@ -216,14 +158,8 @@ examples stored in LIBSVM format. Refer to the [`MLUtils` Java docs](api/java/org/apache/spark/mllib/util/MLUtils.html) for details on the API. -{% highlight java %} -import org.apache.spark.mllib.regression.LabeledPoint; -import org.apache.spark.mllib.util.MLUtils; -import org.apache.spark.api.java.JavaRDD; +{% include_example libsvm java/org/apache/spark/examples/mllib/JavaDataTypesExamples.java %} -JavaRDD examples = - MLUtils.loadLibSVMFile(jsc.sc(), "data/mllib/sample_libsvm_data.txt").toJavaRDD(); -{% endhighlight %}
@@ -232,11 +168,8 @@ examples stored in LIBSVM format. Refer to the [`MLUtils` Python docs](api/python/pyspark.mllib.html#pyspark.mllib.util.MLUtils) for more details on the API. -{% highlight python %} -from pyspark.mllib.util import MLUtils +{% include_example libsvm python/mllib/datatypes_examples.py %} -examples = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt") -{% endhighlight %}
@@ -266,15 +199,8 @@ matrices. Remember, local matrices in MLlib are stored in column-major order. Refer to the [`Matrix` Scala docs](api/scala/index.html#org.apache.spark.mllib.linalg.Matrix) and [`Matrices` Scala docs](api/scala/index.html#org.apache.spark.mllib.linalg.Matrices$) for details on the API. -{% highlight scala %} -import org.apache.spark.mllib.linalg.{Matrix, Matrices} - -// Create a dense matrix ((1.0, 2.0), (3.0, 4.0), (5.0, 6.0)) -val dm: Matrix = Matrices.dense(3, 2, Array(1.0, 3.0, 5.0, 2.0, 4.0, 6.0)) +{% include_example local_matrix scala/org/apache/spark/examples/mllib/DataTypesExamples.scala %} -// Create a sparse matrix ((9.0, 0.0), (0.0, 8.0), (0.0, 6.0)) -val sm: Matrix = Matrices.sparse(3, 2, Array(0, 1, 3), Array(0, 2, 1), Array(9, 6, 8)) -{% endhighlight %}
@@ -289,16 +215,8 @@ matrices. Remember, local matrices in MLlib are stored in column-major order. Refer to the [`Matrix` Java docs](api/java/org/apache/spark/mllib/linalg/Matrix.html) and [`Matrices` Java docs](api/java/org/apache/spark/mllib/linalg/Matrices.html) for details on the API. -{% highlight java %} -import org.apache.spark.mllib.linalg.Matrix; -import org.apache.spark.mllib.linalg.Matrices; +{% include_example local_matrix java/org/apache/spark/examples/mllib/JavaDataTypesExamples.java %} -// Create a dense matrix ((1.0, 2.0), (3.0, 4.0), (5.0, 6.0)) -Matrix dm = Matrices.dense(3, 2, new double[] {1.0, 3.0, 5.0, 2.0, 4.0, 6.0}); - -// Create a sparse matrix ((9.0, 0.0), (0.0, 8.0), (0.0, 6.0)) -Matrix sm = Matrices.sparse(3, 2, new int[] {0, 1, 3}, new int[] {0, 2, 1}, new double[] {9, 6, 8}); -{% endhighlight %}
@@ -313,15 +231,8 @@ matrices. Remember, local matrices in MLlib are stored in column-major order. Refer to the [`Matrix` Python docs](api/python/pyspark.mllib.html#pyspark.mllib.linalg.Matrix) and [`Matrices` Python docs](api/python/pyspark.mllib.html#pyspark.mllib.linalg.Matrices) for more details on the API. -{% highlight python %} -from pyspark.mllib.linalg import Matrix, Matrices - -# Create a dense matrix ((1.0, 2.0), (3.0, 4.0), (5.0, 6.0)) -dm2 = Matrices.dense(3, 2, [1, 2, 3, 4, 5, 6]) +{% include_example local_matrix python/mllib/datatypes_examples.py %} -# Create a sparse matrix ((9.0, 0.0), (0.0, 8.0), (0.0, 6.0)) -sm = Matrices.sparse(3, 2, [0, 1, 3], [0, 2, 1], [9, 6, 8]) -{% endhighlight %}
@@ -369,21 +280,8 @@ For [singular value decomposition (SVD)](https://en.wikipedia.org/wiki/Singular_ Refer to the [`RowMatrix` Scala docs](api/scala/index.html#org.apache.spark.mllib.linalg.distributed.RowMatrix) for details on the API. -{% highlight scala %} -import org.apache.spark.mllib.linalg.Vector -import org.apache.spark.mllib.linalg.distributed.RowMatrix - -val rows: RDD[Vector] = ... // an RDD of local vectors -// Create a RowMatrix from an RDD[Vector]. -val mat: RowMatrix = new RowMatrix(rows) +{% include_example row_matrix scala/org/apache/spark/examples/mllib/DataTypesExamples.scala %} -// Get its size. -val m = mat.numRows() -val n = mat.numCols() - -// QR decomposition -val qrResult = mat.tallSkinnyQR(true) -{% endhighlight %}
@@ -393,22 +291,8 @@ created from a `JavaRDD` instance. Then we can compute its column summa Refer to the [`RowMatrix` Java docs](api/java/org/apache/spark/mllib/linalg/distributed/RowMatrix.html) for details on the API. -{% highlight java %} -import org.apache.spark.api.java.JavaRDD; -import org.apache.spark.mllib.linalg.Vector; -import org.apache.spark.mllib.linalg.distributed.RowMatrix; - -JavaRDD rows = ... // a JavaRDD of local vectors -// Create a RowMatrix from an JavaRDD. -RowMatrix mat = new RowMatrix(rows.rdd()); - -// Get its size. -long m = mat.numRows(); -long n = mat.numCols(); +{% include_example row_matrix java/org/apache/spark/examples/mllib/JavaDataTypesExamples.java %} -// QR decomposition -QRDecomposition result = mat.tallSkinnyQR(true); -{% endhighlight %}
@@ -418,24 +302,9 @@ created from an `RDD` of vectors. Refer to the [`RowMatrix` Python docs](api/python/pyspark.mllib.html#pyspark.mllib.linalg.distributed.RowMatrix) for more details on the API. -{% highlight python %} -from pyspark.mllib.linalg.distributed import RowMatrix +{% include_example row_matrix python/mllib/datatypes_examples.py %} -# Create an RDD of vectors. -rows = sc.parallelize([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]]) - -# Create a RowMatrix from an RDD of vectors. -mat = RowMatrix(rows) - -# Get its size. -m = mat.numRows() # 4 -n = mat.numCols() # 3 - -# Get the rows as an RDD of vectors again. -rowsRDD = mat.rows -{% endhighlight %}
- ### IndexedRowMatrix @@ -456,20 +325,8 @@ its row indices. Refer to the [`IndexedRowMatrix` Scala docs](api/scala/index.html#org.apache.spark.mllib.linalg.distributed.IndexedRowMatrix) for details on the API. -{% highlight scala %} -import org.apache.spark.mllib.linalg.distributed.{IndexedRow, IndexedRowMatrix, RowMatrix} - -val rows: RDD[IndexedRow] = ... // an RDD of indexed rows -// Create an IndexedRowMatrix from an RDD[IndexedRow]. -val mat: IndexedRowMatrix = new IndexedRowMatrix(rows) - -// Get its size. -val m = mat.numRows() -val n = mat.numCols() +{% include_example indexed_row_matrix scala/org/apache/spark/examples/mllib/DataTypesExamples.scala %} -// Drop its row indices. -val rowMat: RowMatrix = mat.toRowMatrix() -{% endhighlight %}
@@ -483,23 +340,8 @@ its row indices. Refer to the [`IndexedRowMatrix` Java docs](api/java/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.html) for details on the API. -{% highlight java %} -import org.apache.spark.api.java.JavaRDD; -import org.apache.spark.mllib.linalg.distributed.IndexedRow; -import org.apache.spark.mllib.linalg.distributed.IndexedRowMatrix; -import org.apache.spark.mllib.linalg.distributed.RowMatrix; +{% include_example indexed_row_matrix java/org/apache/spark/examples/mllib/JavaDataTypesExamples.java %} -JavaRDD rows = ... // a JavaRDD of indexed rows -// Create an IndexedRowMatrix from a JavaRDD. -IndexedRowMatrix mat = new IndexedRowMatrix(rows.rdd()); - -// Get its size. -long m = mat.numRows(); -long n = mat.numCols(); - -// Drop its row indices. -RowMatrix rowMat = mat.toRowMatrix(); -{% endhighlight %}
@@ -512,34 +354,9 @@ its row indices. Refer to the [`IndexedRowMatrix` Python docs](api/python/pyspark.mllib.html#pyspark.mllib.linalg.distributed.IndexedRowMatrix) for more details on the API. -{% highlight python %} -from pyspark.mllib.linalg.distributed import IndexedRow, IndexedRowMatrix - -# Create an RDD of indexed rows. -# - This can be done explicitly with the IndexedRow class: -indexedRows = sc.parallelize([IndexedRow(0, [1, 2, 3]), - IndexedRow(1, [4, 5, 6]), - IndexedRow(2, [7, 8, 9]), - IndexedRow(3, [10, 11, 12])]) -# - or by using (long, vector) tuples: -indexedRows = sc.parallelize([(0, [1, 2, 3]), (1, [4, 5, 6]), - (2, [7, 8, 9]), (3, [10, 11, 12])]) - -# Create an IndexedRowMatrix from an RDD of IndexedRows. -mat = IndexedRowMatrix(indexedRows) - -# Get its size. -m = mat.numRows() # 4 -n = mat.numCols() # 3 +{% include_example indexed_row_matrix python/mllib/datatypes_examples.py %} -# Get the rows as an RDD of IndexedRows. -rowsRDD = mat.rows - -# Convert to a RowMatrix by dropping the row indices. -rowMat = mat.toRowMatrix() -{% endhighlight %}
- ### CoordinateMatrix @@ -562,20 +379,8 @@ with sparse rows by calling `toIndexedRowMatrix`. Other computations for Refer to the [`CoordinateMatrix` Scala docs](api/scala/index.html#org.apache.spark.mllib.linalg.distributed.CoordinateMatrix) for details on the API. -{% highlight scala %} -import org.apache.spark.mllib.linalg.distributed.{CoordinateMatrix, MatrixEntry} - -val entries: RDD[MatrixEntry] = ... // an RDD of matrix entries -// Create a CoordinateMatrix from an RDD[MatrixEntry]. -val mat: CoordinateMatrix = new CoordinateMatrix(entries) +{% include_example coordinate_matrix scala/org/apache/spark/examples/mllib/DataTypesExamples.scala %} -// Get its size. -val m = mat.numRows() -val n = mat.numCols() - -// Convert it to an IndexRowMatrix whose rows are sparse vectors. -val indexedRowMatrix = mat.toIndexedRowMatrix() -{% endhighlight %}
@@ -590,23 +395,8 @@ with sparse rows by calling `toIndexedRowMatrix`. Other computations for Refer to the [`CoordinateMatrix` Java docs](api/java/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.html) for details on the API. -{% highlight java %} -import org.apache.spark.api.java.JavaRDD; -import org.apache.spark.mllib.linalg.distributed.CoordinateMatrix; -import org.apache.spark.mllib.linalg.distributed.IndexedRowMatrix; -import org.apache.spark.mllib.linalg.distributed.MatrixEntry; - -JavaRDD entries = ... // a JavaRDD of matrix entries -// Create a CoordinateMatrix from a JavaRDD. -CoordinateMatrix mat = new CoordinateMatrix(entries.rdd()); - -// Get its size. -long m = mat.numRows(); -long n = mat.numCols(); +{% include_example coordinate_matrix java/org/apache/spark/examples/mllib/JavaDataTypesExamples.java %} -// Convert it to an IndexRowMatrix whose rows are sparse vectors. -IndexedRowMatrix indexedRowMatrix = mat.toIndexedRowMatrix(); -{% endhighlight %}
@@ -619,36 +409,9 @@ calling `toRowMatrix`, or to an `IndexedRowMatrix` with sparse rows by calling ` Refer to the [`CoordinateMatrix` Python docs](api/python/pyspark.mllib.html#pyspark.mllib.linalg.distributed.CoordinateMatrix) for more details on the API. -{% highlight python %} -from pyspark.mllib.linalg.distributed import CoordinateMatrix, MatrixEntry - -# Create an RDD of coordinate entries. -# - This can be done explicitly with the MatrixEntry class: -entries = sc.parallelize([MatrixEntry(0, 0, 1.2), MatrixEntry(1, 0, 2.1), MatrixEntry(6, 1, 3.7)]) -# - or using (long, long, float) tuples: -entries = sc.parallelize([(0, 0, 1.2), (1, 0, 2.1), (2, 1, 3.7)]) - -# Create an CoordinateMatrix from an RDD of MatrixEntries. -mat = CoordinateMatrix(entries) - -# Get its size. -m = mat.numRows() # 3 -n = mat.numCols() # 2 - -# Get the entries as an RDD of MatrixEntries. -entriesRDD = mat.entries - -# Convert to a RowMatrix. -rowMat = mat.toRowMatrix() - -# Convert to an IndexedRowMatrix. -indexedRowMat = mat.toIndexedRowMatrix() +{% include_example coordinate_matrix python/mllib/datatypes_examples.py %} -# Convert to a BlockMatrix. -blockMat = mat.toBlockMatrix() -{% endhighlight %}
- ### BlockMatrix @@ -670,22 +433,8 @@ Users may change the block size by supplying the values through `toBlockMatrix(r Refer to the [`BlockMatrix` Scala docs](api/scala/index.html#org.apache.spark.mllib.linalg.distributed.BlockMatrix) for details on the API. -{% highlight scala %} -import org.apache.spark.mllib.linalg.distributed.{BlockMatrix, CoordinateMatrix, MatrixEntry} - -val entries: RDD[MatrixEntry] = ... // an RDD of (i, j, v) matrix entries -// Create a CoordinateMatrix from an RDD[MatrixEntry]. -val coordMat: CoordinateMatrix = new CoordinateMatrix(entries) -// Transform the CoordinateMatrix to a BlockMatrix -val matA: BlockMatrix = coordMat.toBlockMatrix().cache() - -// Validate whether the BlockMatrix is set up properly. Throws an Exception when it is not valid. -// Nothing happens if it is valid. -matA.validate() +{% include_example block_matrix scala/org/apache/spark/examples/mllib/DataTypesExamples.scala %} -// Calculate A^T A. -val ata = matA.transpose.multiply(matA) -{% endhighlight %}
@@ -697,25 +446,8 @@ Users may change the block size by supplying the values through `toBlockMatrix(r Refer to the [`BlockMatrix` Java docs](api/java/org/apache/spark/mllib/linalg/distributed/BlockMatrix.html) for details on the API. -{% highlight java %} -import org.apache.spark.api.java.JavaRDD; -import org.apache.spark.mllib.linalg.distributed.BlockMatrix; -import org.apache.spark.mllib.linalg.distributed.CoordinateMatrix; -import org.apache.spark.mllib.linalg.distributed.IndexedRowMatrix; +{% include_example block_matrix java/org/apache/spark/examples/mllib/JavaDataTypesExamples.java %} -JavaRDD entries = ... // a JavaRDD of (i, j, v) Matrix Entries -// Create a CoordinateMatrix from a JavaRDD. -CoordinateMatrix coordMat = new CoordinateMatrix(entries.rdd()); -// Transform the CoordinateMatrix to a BlockMatrix -BlockMatrix matA = coordMat.toBlockMatrix().cache(); - -// Validate whether the BlockMatrix is set up properly. Throws an Exception when it is not valid. -// Nothing happens if it is valid. -matA.validate(); - -// Calculate A^T A. -BlockMatrix ata = matA.transpose().multiply(matA); -{% endhighlight %}
@@ -726,32 +458,7 @@ can be created from an `RDD` of sub-matrix blocks, where a sub-matrix block is a Refer to the [`BlockMatrix` Python docs](api/python/pyspark.mllib.html#pyspark.mllib.linalg.distributed.BlockMatrix) for more details on the API. -{% highlight python %} -from pyspark.mllib.linalg import Matrices -from pyspark.mllib.linalg.distributed import BlockMatrix - -# Create an RDD of sub-matrix blocks. -blocks = sc.parallelize([((0, 0), Matrices.dense(3, 2, [1, 2, 3, 4, 5, 6])), - ((1, 0), Matrices.dense(3, 2, [7, 8, 9, 10, 11, 12]))]) - -# Create a BlockMatrix from an RDD of sub-matrix blocks. -mat = BlockMatrix(blocks, 3, 2) - -# Get its size. -m = mat.numRows() # 6 -n = mat.numCols() # 2 - -# Get the blocks as an RDD of sub-matrix blocks. -blocksRDD = mat.blocks - -# Convert to a LocalMatrix. -localMat = mat.toLocalMatrix() - -# Convert to an IndexedRowMatrix. -indexedRowMat = mat.toIndexedRowMatrix() +{% include_example block_matrix python/mllib/datatypes_examples.py %} -# Convert to a CoordinateMatrix. -coordinateMat = mat.toCoordinateMatrix() -{% endhighlight %}
diff --git a/examples/src/main/java/org/apache/spark/examples/mllib/JavaDataTypesExamples.java b/examples/src/main/java/org/apache/spark/examples/mllib/JavaDataTypesExamples.java new file mode 100644 index 0000000000000..24926d4201935 --- /dev/null +++ b/examples/src/main/java/org/apache/spark/examples/mllib/JavaDataTypesExamples.java @@ -0,0 +1,224 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.examples.mllib; + +import java.util.Arrays; + +import org.apache.spark.SparkConf; +import org.apache.spark.SparkContext; +import org.apache.spark.api.java.JavaSparkContext; + +// $example on:local_vector$ +import org.apache.spark.mllib.linalg.Vector; +import org.apache.spark.mllib.linalg.Vectors; +// $example off:local_vector$ +// $example on:labeled_point$ +import org.apache.spark.mllib.linalg.Vectors; +import org.apache.spark.mllib.regression.LabeledPoint; +// $example off:labeled_point$ +// $example on:libsvm$ +import org.apache.spark.mllib.regression.LabeledPoint; +import org.apache.spark.mllib.util.MLUtils; +import org.apache.spark.api.java.JavaRDD; +// $example off:libsvm$ +// $example on:local_matrix$ +import org.apache.spark.mllib.linalg.Matrix; +import org.apache.spark.mllib.linalg.Matrices; +// $example off:local_matrix$ +// $example on:row_matrix$ +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.mllib.linalg.Vector; +import org.apache.spark.mllib.linalg.distributed.RowMatrix; +import org.apache.spark.mllib.linalg.QRDecomposition; +// $example off:row_matrix$ +// $example on:indexed_row_matrix$ +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.mllib.linalg.distributed.IndexedRow; +import org.apache.spark.mllib.linalg.distributed.IndexedRowMatrix; +import org.apache.spark.mllib.linalg.distributed.RowMatrix; +// $example off:indexed_row_matrix$ +// $example on:coordinate_matrix$ +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.mllib.linalg.distributed.CoordinateMatrix; +import org.apache.spark.mllib.linalg.distributed.IndexedRowMatrix; +import org.apache.spark.mllib.linalg.distributed.MatrixEntry; +// $example off:coordinate_matrix$ +// $example on:block_matrix$ +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.mllib.linalg.distributed.BlockMatrix; +import org.apache.spark.mllib.linalg.distributed.CoordinateMatrix; +import org.apache.spark.mllib.linalg.distributed.IndexedRowMatrix; +// $example off:block_matrix$ + + +public class JavaDataTypesExamples { + + private static void localVectorExample() { + // $example on:local_vector$ + // Create a dense vector (1.0, 0.0, 3.0). + Vector dv = Vectors.dense(1.0, 0.0, 3.0); + // Create a sparse vector (1.0, 0.0, 3.0) by specifying its indices and values corresponding to + // nonzero entries. + Vector sv = Vectors.sparse(3, new int[] {0, 2}, new double[] {1.0, 3.0}); + // $example off:local_vector$ + } + + private static void labeledPointExample() { + // $example on:labeled_point$ + // Create a labeled point with a positive label and a dense feature vector. + LabeledPoint pos = new LabeledPoint(1.0, Vectors.dense(1.0, 0.0, 3.0)); + + // Create a labeled point with a negative label and a sparse feature vector. + LabeledPoint neg = + new LabeledPoint(0.0, Vectors.sparse(3, new int[] {0, 2}, new double[] {1.0, 3.0})); + // $example off:labeled_point$ + } + + private static void libsvmExample() { + // $example on:libsvm$ + SparkContext sc = SparkContext.getOrCreate(); + JavaRDD examples = + MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt").toJavaRDD(); + // $example off:libsvm$ + } + + private static void localMatrixExample() { + // $example on:local_matrix$ + // Create a dense matrix ((1.0, 2.0), (3.0, 4.0), (5.0, 6.0)) + Matrix dm = Matrices.dense(3, 2, new double[] {1.0, 3.0, 5.0, 2.0, 4.0, 6.0}); + + // Create a sparse matrix ((9.0, 0.0), (0.0, 8.0), (0.0, 6.0)) + Matrix sm = + Matrices.sparse(3, 2, new int[] {0, 1, 3}, new int[] {0, 2, 1}, new double[] {9, 6, 8}); + // $example off:local_matrix$ + } + + private static void rowMatrixExample() { + SparkContext sc = SparkContext.getOrCreate(); + JavaSparkContext jsc = JavaSparkContext.fromSparkContext(sc); + + // $example on:row_matrix$ + Vector v1 = Vectors.dense(1.0, 10.0, 100.0); + Vector v2 = Vectors.dense(2.0, 20.0, 200.0); + Vector v3 = Vectors.dense(3.0, 30.0, 300.0); + + // a JavaRDD of local vectors + JavaRDD rows = jsc.parallelize(Arrays.asList(v1, v2, v3)); + + // Create a RowMatrix from an JavaRDD. + RowMatrix mat = new RowMatrix(rows.rdd()); + + // Get its size. + long m = mat.numRows(); + long n = mat.numCols(); + + // QR decomposition + QRDecomposition result = mat.tallSkinnyQR(true); + // $example off:row_matrix$ + } + + private static void indexedRowMatrixExample() { + SparkContext sc = SparkContext.getOrCreate(); + JavaSparkContext jsc = JavaSparkContext.fromSparkContext(sc); + + // $example on:indexed_row_matrix$ + IndexedRow r0 = new IndexedRow(0, Vectors.dense(1, 2, 3)); + IndexedRow r1 = new IndexedRow(1, Vectors.dense(4, 5, 6)); + IndexedRow r2 = new IndexedRow(2, Vectors.dense(7, 8, 9)); + IndexedRow r3 = new IndexedRow(3, Vectors.dense(10, 11, 12)); + + // a JavaRDD of indexed rows + JavaRDD rows = jsc.parallelize(Arrays.asList(r0, r1, r2, r3)); + + // Create an IndexedRowMatrix from a JavaRDD. + IndexedRowMatrix mat = new IndexedRowMatrix(rows.rdd()); + + // Get its size. + long m = mat.numRows(); + long n = mat.numCols(); + + // Drop its row indices. + RowMatrix rowMat = mat.toRowMatrix(); + // $example off:indexed_row_matrix$ + } + + private static void coordinateMatrixExample() { + SparkContext sc = SparkContext.getOrCreate(); + JavaSparkContext jsc = JavaSparkContext.fromSparkContext(sc); + + // $example on:coordinate_matrix$ + MatrixEntry me1 = new MatrixEntry(0, 0, 1.2); + MatrixEntry me2 = new MatrixEntry(1, 0, 2.1); + MatrixEntry me3 = new MatrixEntry(6, 1, 3.7); + + // a JavaRDD of matrix entries + JavaRDD entries = jsc.parallelize(Arrays.asList(me1, me2, me3)); + // Create a CoordinateMatrix from a JavaRDD. + CoordinateMatrix mat = new CoordinateMatrix(entries.rdd()); + + // Get its size. + long m = mat.numRows(); + long n = mat.numCols(); + + // Convert it to an IndexRowMatrix whose rows are sparse vectors. + IndexedRowMatrix indexedRowMatrix = mat.toIndexedRowMatrix(); + // $example off:coordinate_matrix$ + } + + private static void blockMatrixExample() { + SparkContext sc = SparkContext.getOrCreate(); + JavaSparkContext jsc = JavaSparkContext.fromSparkContext(sc); + + // $example on:block_matrix$ + MatrixEntry me1 = new MatrixEntry(0, 0, 1.2); + MatrixEntry me2 = new MatrixEntry(1, 0, 2.1); + MatrixEntry me3 = new MatrixEntry(6, 1, 3.7); + + // A JavaRDD of (i, j, v) Matrix Entries + JavaRDD entries = jsc.parallelize(Arrays.asList(me1, me2, me3)); + + // Create a CoordinateMatrix from a JavaRDD. + CoordinateMatrix coordMat = new CoordinateMatrix(entries.rdd()); + // Transform the CoordinateMatrix to a BlockMatrix + BlockMatrix matA = coordMat.toBlockMatrix().cache(); + + // Validate whether the BlockMatrix is set up properly. + // Throws an Exception when it is not valid. Nothing happens if it is valid. + matA.validate(); + + // Calculate A^T A. + BlockMatrix ata = matA.transpose().multiply(matA); + // $example off:block_matrix$ + } + + public static void main(String[] args) { + SparkConf conf = new SparkConf().setAppName("JavaDataTypesExample"); + SparkContext sc = new SparkContext(conf); + + localVectorExample(); + labeledPointExample(); + libsvmExample(); + localMatrixExample(); + rowMatrixExample(); + indexedRowMatrixExample(); + coordinateMatrixExample(); + blockMatrixExample(); + + sc.stop(); + } +} diff --git a/examples/src/main/python/mllib/datatypes_examples.py b/examples/src/main/python/mllib/datatypes_examples.py new file mode 100644 index 0000000000000..bf12c273602ae --- /dev/null +++ b/examples/src/main/python/mllib/datatypes_examples.py @@ -0,0 +1,207 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from __future__ import print_function + +from pyspark import SparkContext +from pyspark.sql import SQLContext + + +def __local_vector_example(): + # $example on:local_vector$ + import numpy as np + import scipy.sparse as sps + from pyspark.mllib.linalg import Vectors + + # Use a NumPy array as a dense vector. + dv1 = np.array([1.0, 0.0, 3.0]) + # Use a Python list as a dense vector. + dv2 = [1.0, 0.0, 3.0] + # Create a SparseVector. + sv1 = Vectors.sparse(3, [0, 2], [1.0, 3.0]) + # Use a single-column SciPy csc_matrix as a sparse vector. + sv2 = sps.csc_matrix((np.array([1.0, 3.0]), np.array([0, 2]), np.array([0, 2])), shape=(3, 1)) + # $example off:local_vector$ + + +def __labeled_point_example(): + # $example on:labeled_point$ + from pyspark.mllib.linalg import SparseVector + from pyspark.mllib.regression import LabeledPoint + + # Create a labeled point with a positive label and a dense feature vector. + pos = LabeledPoint(1.0, [1.0, 0.0, 3.0]) + + # Create a labeled point with a negative label and a sparse feature vector. + neg = LabeledPoint(0.0, SparseVector(3, [0, 2], [1.0, 3.0])) + # $example off:labeled_point$ + + +def __libsvm_example(): + sc = SparkContext.getOrCreate() + + # $example on:libsvm$ + from pyspark.mllib.util import MLUtils + + examples = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt") + # $example off:libsvm$ + + +def __local_matrix_example(): + # $example on:local_matrix$ + from pyspark.mllib.linalg import Matrix, Matrices + + # Create a dense matrix ((1.0, 2.0), (3.0, 4.0), (5.0, 6.0)) + dm2 = Matrices.dense(3, 2, [1, 2, 3, 4, 5, 6]) + + # Create a sparse matrix ((9.0, 0.0), (0.0, 8.0), (0.0, 6.0)) + sm = Matrices.sparse(3, 2, [0, 1, 3], [0, 2, 1], [9, 6, 8]) + # $example off:local_matrix$ + + +def __row_matrix_example(): + sc = SparkContext.getOrCreate() + + # $example on:row_matrix$ + from pyspark.mllib.linalg.distributed import RowMatrix + + # Create an RDD of vectors. + rows = sc.parallelize([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]], 1) + + # Create a RowMatrix from an RDD of vectors. + mat = RowMatrix(rows) + + # Get its size. + m = mat.numRows() # 4 + n = mat.numCols() # 3 + + # QR decomposition + qrResult = mat.tallSkinnyQR(True) + # $example off:row_matrix$ + + +def __indexed_row_matrix_example(): + sc = SparkContext.getOrCreate() + sqlContext = SQLContext.getOrCreate(sc) + + # $example on:indexed_row_matrix$ + from pyspark.mllib.linalg.distributed import IndexedRow, IndexedRowMatrix + + # Create an RDD of indexed rows. + # - This can be done explicitly with the IndexedRow class: + indexedRows = sc.parallelize([IndexedRow(0, [1, 2, 3]), + IndexedRow(1, [4, 5, 6]), + IndexedRow(2, [7, 8, 9]), + IndexedRow(3, [10, 11, 12])]) + # - or by using (long, vector) tuples: + indexedRows = sc.parallelize([(0, [1, 2, 3]), (1, [4, 5, 6]), + (2, [7, 8, 9]), (3, [10, 11, 12])]) + + # Create an IndexedRowMatrix from an RDD of IndexedRows. + mat = IndexedRowMatrix(indexedRows) + + # Get its size. + m = mat.numRows() # 4 + n = mat.numCols() # 3 + + # Get the rows as an RDD of IndexedRows. + rowsRDD = mat.rows + + # Convert to a RowMatrix by dropping the row indices. + rowMat = mat.toRowMatrix() + # $example off:indexed_row_matrix$ + + +def __coordinate_matrix_example(): + sc = SparkContext.getOrCreate() + + # $example on:coordinate_matrix$ + from pyspark.mllib.linalg.distributed import CoordinateMatrix, MatrixEntry + + # Create an RDD of coordinate entries. + # - This can be done explicitly with the MatrixEntry class: + entries =\ + sc.parallelize([MatrixEntry(0, 0, 1.2), MatrixEntry(1, 0, 2.1), MatrixEntry(6, 1, 3.7)]) + # - or using (long, long, float) tuples: + entries = sc.parallelize([(0, 0, 1.2), (1, 0, 2.1), (2, 1, 3.7)]) + + # Create an CoordinateMatrix from an RDD of MatrixEntries. + mat = CoordinateMatrix(entries) + + # Get its size. + m = mat.numRows() # 3 + n = mat.numCols() # 2 + + # Get the entries as an RDD of MatrixEntries. + entriesRDD = mat.entries + + # Convert to a RowMatrix. + rowMat = mat.toRowMatrix() + + # Convert to an IndexedRowMatrix. + indexedRowMat = mat.toIndexedRowMatrix() + + # Convert to a BlockMatrix. + blockMat = mat.toBlockMatrix() + # $example off:coordinate_matrix$ + + +def __block_matrix(): + sc = SparkContext.getOrCreate() + + # $example on:block_matrix$ + from pyspark.mllib.linalg import Matrices + from pyspark.mllib.linalg.distributed import BlockMatrix + + # Create an RDD of sub-matrix blocks. + blocks = sc.parallelize([((0, 0), Matrices.dense(3, 2, [1, 2, 3, 4, 5, 6])), + ((1, 0), Matrices.dense(3, 2, [7, 8, 9, 10, 11, 12]))]) + + # Create a BlockMatrix from an RDD of sub-matrix blocks. + mat = BlockMatrix(blocks, 3, 2) + + # Get its size. + m = mat.numRows() # 6 + n = mat.numCols() # 2 + + # Get the blocks as an RDD of sub-matrix blocks. + blocksRDD = mat.blocks + + # Convert to a LocalMatrix. + localMat = mat.toLocalMatrix() + + # Convert to an IndexedRowMatrix. + indexedRowMat = mat.toIndexedRowMatrix() + + # Convert to a CoordinateMatrix. + coordinateMat = mat.toCoordinateMatrix() + # $example off:block_matrix$ + + +if __name__ == "__main__": + sc = SparkContext(appName="PythonDataTypesExamples") # SparkContext + + __local_vector_example() + __labeled_point_example() + __libsvm_example() + __local_matrix_example() + __row_matrix_example() + __indexed_row_matrix_example() + __coordinate_matrix_example() + __block_matrix() + + sc.stop() diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/DataTypesExamples.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/DataTypesExamples.scala new file mode 100644 index 0000000000000..28c41b8d64988 --- /dev/null +++ b/examples/src/main/scala/org/apache/spark/examples/mllib/DataTypesExamples.scala @@ -0,0 +1,195 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// scalastyle:off println +package org.apache.spark.examples.mllib + +import org.apache.spark.{SparkConf, SparkContext} + + +object DataTypesExamples { + + private def localVectorExample(): Unit = { + // $example on:local_vector$ + import org.apache.spark.mllib.linalg.{Vector, Vectors} + + // Create a dense vector (1.0, 0.0, 3.0). + val dv: Vector = Vectors.dense(1.0, 0.0, 3.0) + // Create a sparse vector (1.0, 0.0, 3.0) by specifying its indices and values corresponding to + // nonzero entries. + val sv1: Vector = Vectors.sparse(3, Array(0, 2), Array(1.0, 3.0)) + // Create a sparse vector (1.0, 0.0, 3.0) by specifying its nonzero entries. + val sv2: Vector = Vectors.sparse(3, Seq((0, 1.0), (2, 3.0))) + // $example off:local_vector$ + } + + private def labeledPointExample(): Unit = { + // $example on:labeled_point$ + import org.apache.spark.mllib.linalg.Vectors + import org.apache.spark.mllib.regression.LabeledPoint + + // Create a labeled point with a positive label and a dense feature vector. + val pos = LabeledPoint(1.0, Vectors.dense(1.0, 0.0, 3.0)) + + // Create a labeled point with a negative label and a sparse feature vector. + val neg = LabeledPoint(0.0, Vectors.sparse(3, Array(0, 2), Array(1.0, 3.0))) + // $example off:labeled_point$ + } + + private def libsvmExample(): Unit = { + val sc = SparkContext.getOrCreate() + // $example on:libsvm$ + import org.apache.spark.mllib.regression.LabeledPoint + import org.apache.spark.mllib.util.MLUtils + import org.apache.spark.rdd.RDD + + val examples: RDD[LabeledPoint] = + MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt") + // $example off:libsvm$ + } + + private def localMatrixExample(): Unit = { + // $example on:local_matrix$ + import org.apache.spark.mllib.linalg.{Matrix, Matrices} + + // Create a dense matrix ((1.0, 2.0), (3.0, 4.0), (5.0, 6.0)) + val dm: Matrix = Matrices.dense(3, 2, Array(1.0, 3.0, 5.0, 2.0, 4.0, 6.0)) + + // Create a sparse matrix ((9.0, 0.0), (0.0, 8.0), (0.0, 6.0)) + val sm: Matrix = Matrices.sparse(3, 2, Array(0, 1, 3), Array(0, 2, 1), Array(9, 6, 8)) + // $example off:local_matrix$ + } + + private def rowMatrixExample(): Unit = { + val sc = SparkContext.getOrCreate() + // $example on:row_matrix$ + import org.apache.spark.mllib.linalg.{Vector, Vectors} + import org.apache.spark.mllib.linalg.distributed.RowMatrix + import org.apache.spark.rdd.RDD + + val v1 = Vectors.dense(1.0, 10.0, 100.0) + val v2 = Vectors.dense(2.0, 20.0, 200.0) + val v3 = Vectors.dense(3.0, 30.0, 300.0) + + val rows: RDD[Vector] = sc.parallelize(Seq(v1, v2, v3)) // an RDD of local vectors + // Create a RowMatrix from an RDD[Vector]. + val mat: RowMatrix = new RowMatrix(rows) + + // Get its size. + val m = mat.numRows() + val n = mat.numCols() + + // QR decomposition + val qrResult = mat.tallSkinnyQR(true) + // $example off:row_matrix$ + } + + private def indexedRowMatrixExample(): Unit = { + val sc = SparkContext.getOrCreate() + + // $example on:indexed_row_matrix$ + import org.apache.spark.mllib.linalg.Vectors + import org.apache.spark.mllib.linalg.distributed.{IndexedRow, IndexedRowMatrix, RowMatrix} + import org.apache.spark.rdd.RDD + + val r0 = IndexedRow(0, Vectors.dense(1, 2, 3)) + val r1 = IndexedRow(1, Vectors.dense(4, 5, 6)) + val r2 = IndexedRow(2, Vectors.dense(7, 8, 9)) + val r3 = IndexedRow(3, Vectors.dense(10, 11, 12)) + + val rows: RDD[IndexedRow] = sc.parallelize(Seq(r0, r1, r2, r3)) // an RDD of indexed rows + // Create an IndexedRowMatrix from an RDD[IndexedRow]. + val mat: IndexedRowMatrix = new IndexedRowMatrix(rows) + + // Get its size. + val m = mat.numRows() + val n = mat.numCols() + + // Drop its row indices. + val rowMat: RowMatrix = mat.toRowMatrix() + // $example off:indexed_row_matrix$ + } + + private def coordinateMatrixExample(): Unit = { + val sc = SparkContext.getOrCreate() + + // $example on:coordinate_matrix$ + import org.apache.spark.mllib.linalg.distributed.{CoordinateMatrix, MatrixEntry} + import org.apache.spark.rdd.RDD + + val me1 = MatrixEntry(0, 0, 1.2) + val me2 = MatrixEntry(1, 0, 2.1) + val me3 = MatrixEntry(6, 1, 3.7) + + val entries: RDD[MatrixEntry] = sc.parallelize(Seq(me1, me2, me3)) // an RDD of matrix entries + // Create a CoordinateMatrix from an RDD[MatrixEntry]. + val mat: CoordinateMatrix = new CoordinateMatrix(entries) + + // Get its size. + val m = mat.numRows() + val n = mat.numCols() + + // Convert it to an IndexRowMatrix whose rows are sparse vectors. + val indexedRowMatrix = mat.toIndexedRowMatrix() + // $example off:coordinate_matrix$ + } + + private def blockMatrixExample(): Unit = { + val sc = SparkContext.getOrCreate() + + // $example on:block_matrix$ + import org.apache.spark.mllib.linalg.distributed.{BlockMatrix, CoordinateMatrix, MatrixEntry} + import org.apache.spark.rdd.RDD + + val me1 = MatrixEntry(0, 0, 1.2) + val me2 = MatrixEntry(1, 0, 2.1) + val me3 = MatrixEntry(6, 1, 3.7) + + // an RDD of (i, j, v) matrix entries + val entries: RDD[MatrixEntry] = sc.parallelize(Seq(me1, me2, me3)) + // Create a CoordinateMatrix from an RDD[MatrixEntry]. + val coordMat: CoordinateMatrix = new CoordinateMatrix(entries) + // Transform the CoordinateMatrix to a BlockMatrix + val matA: BlockMatrix = coordMat.toBlockMatrix().cache() + + // Validate whether the BlockMatrix is set up properly. + // Throws an Exception when it is not valid. + // Nothing happens if it is valid. + matA.validate() + + // Calculate A^T A. + val ata = matA.transpose.multiply(matA) + // $example off:block_matrix$ + } + + def main(args: Array[String]): Unit = { + val conf = new SparkConf().setAppName("DataTypesExamples") + val sc = new SparkContext(conf) + + localVectorExample() + labeledPointExample() + libsvmExample() + localMatrixExample() + rowMatrixExample() + indexedRowMatrixExample() + coordinateMatrixExample() + blockMatrixExample() + + sc.stop() + } +} +// scalastyle:on println