From a4d256b37b0e4c8acdc9d9bca43588cd90cf1bcd Mon Sep 17 00:00:00 2001 From: Vinod K C Date: Wed, 2 Sep 2015 12:31:32 +0530 Subject: [PATCH 1/4] Added hashcode method --- .../scala/org/apache/spark/ml/tree/Split.scala | 9 +++++++++ .../org/apache/spark/mllib/linalg/Matrices.scala | 14 ++++++++++++-- .../spark/ml/tree/impl/RandomForestSuite.scala | 15 ++++++++++++++- .../apache/spark/mllib/linalg/MatricesSuite.scala | 6 +++++- 4 files changed, 40 insertions(+), 4 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/Split.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/Split.scala index 78199cc2df582..8390aea4109a3 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/tree/Split.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/tree/Split.scala @@ -120,6 +120,13 @@ final class CategoricalSplit private[ml] ( } } + override def hashCode(): Int = { + var hashCode = categories.hashCode + hashCode = hashCode * 31 + featureIndex + hashCode = hashCode * 31 + isLeft.hashCode() + hashCode + } + override private[tree] def toOld: OldSplit = { val oldCats = if (isLeft) { categories @@ -181,6 +188,8 @@ final class ContinuousSplit private[ml] (override val featureIndex: Int, val thr } } + override def hashCode(): Int = 31 * (31 + featureIndex) + threshold.hashCode + override private[tree] def toOld: OldSplit = { OldSplit(featureIndex, threshold, OldFeatureType.Continuous, List.empty[Double]) } diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala index c02ba426fcc3a..114952a6f970c 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala @@ -27,7 +27,6 @@ import org.apache.spark.annotation.{DeveloperApi, Since} import org.apache.spark.sql.catalyst.expressions.GenericMutableRow import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.types._ - /** * Trait for a local matrix. */ @@ -278,7 +277,8 @@ class DenseMatrix @Since("1.3.0") ( } override def hashCode: Int = { - com.google.common.base.Objects.hashCode(numRows : Integer, numCols: Integer, toArray) + val state = Seq(numRows, numCols, java.util.Arrays.hashCode(values), isTransposed.hashCode) + state.reduce((a, b) => 31 * a + b) } private[mllib] def toBreeze: BM[Double] = { @@ -554,6 +554,16 @@ class SparseMatrix @Since("1.3.0") ( case _ => false } + override def hashCode(): Int = { + val state = Seq( + numRows, + numCols, + java.util.Arrays.hashCode(colPtrs), + java.util.Arrays.hashCode(rowIndices), + java.util.Arrays.hashCode(values)) + state.reduce((a, b) => 31 * a + b) + } + private[mllib] def toBreeze: BM[Double] = { if (!isTransposed) { new BSM[Double](values, numRows, numCols, colPtrs, rowIndices) diff --git a/mllib/src/test/scala/org/apache/spark/ml/tree/impl/RandomForestSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/tree/impl/RandomForestSuite.scala index dc852795c7f62..29aa0626dfc9b 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/tree/impl/RandomForestSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/tree/impl/RandomForestSuite.scala @@ -20,7 +20,7 @@ package org.apache.spark.ml.tree.impl import org.apache.spark.SparkFunSuite import org.apache.spark.ml.classification.DecisionTreeClassificationModel import org.apache.spark.ml.impl.TreeTests -import org.apache.spark.ml.tree.{ContinuousSplit, DecisionTreeModel, LeafNode, Node} +import org.apache.spark.ml.tree._ import org.apache.spark.mllib.linalg.{Vector, Vectors} import org.apache.spark.mllib.tree.impurity.GiniCalculator import org.apache.spark.mllib.util.MLlibTestSparkContext @@ -95,6 +95,19 @@ class RandomForestSuite extends SparkFunSuite with MLlibTestSparkContext { assert(mapToVec(map.toMap) ~== mapToVec(expected) relTol 0.01) } + test("equals and hashCode ContinuousSplit") { + val split1 = new ContinuousSplit(1, 1.0) + val split2 = new ContinuousSplit(1, 1.0) + assert(split1 == split2) + assert(split1.hashCode === split2.hashCode) + } + + test("equals and hashCode CategoricalSplit") { + val split1 = new CategoricalSplit(100, Array(0, 2.0), 5) + val split2 = new CategoricalSplit(100, Array(0, 2.0), 5) + assert(split1 == split2) + assert(split1.hashCode === split2.hashCode) + } } private object RandomForestSuite { diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala index bfd6d5495f5e0..6240a62fc27bb 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala @@ -74,17 +74,21 @@ class MatricesSuite extends SparkFunSuite { } } - test("equals") { + test("equals and hashCode") { + val dm = Matrices.dense(2, 2, Array(0.0, 1.0, 2.0, 3.0)) val dm1 = Matrices.dense(2, 2, Array(0.0, 1.0, 2.0, 3.0)) assert(dm1 === dm1) + assert(dm1.hashCode === dm.hashCode) assert(dm1 !== dm1.transpose) + val dm2 = Matrices.dense(2, 2, Array(0.0, 2.0, 1.0, 3.0)) assert(dm1 === dm2.transpose) val sm1 = dm1.asInstanceOf[DenseMatrix].toSparse assert(sm1 === sm1) assert(sm1 === dm1) + assert(sm1.hashCode === dm.asInstanceOf[DenseMatrix].toSparse.hashCode) assert(sm1 !== sm1.transpose) val sm2 = dm2.asInstanceOf[DenseMatrix].toSparse From 7878c42d568d4c08a64717e66176b0ecf7ee27ab Mon Sep 17 00:00:00 2001 From: Vinod K C Date: Wed, 2 Sep 2015 12:39:31 +0530 Subject: [PATCH 2/4] Removed blank lines --- mllib/src/main/scala/org/apache/spark/ml/tree/Split.scala | 2 +- .../src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala | 1 + .../scala/org/apache/spark/mllib/linalg/MatricesSuite.scala | 1 - 3 files changed, 2 insertions(+), 2 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/Split.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/Split.scala index 8390aea4109a3..2ad8c8457039a 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/tree/Split.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/tree/Split.scala @@ -123,7 +123,7 @@ final class CategoricalSplit private[ml] ( override def hashCode(): Int = { var hashCode = categories.hashCode hashCode = hashCode * 31 + featureIndex - hashCode = hashCode * 31 + isLeft.hashCode() + hashCode = hashCode * 31 + isLeft.hashCode hashCode } diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala index 114952a6f970c..d33974be5afe4 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala @@ -27,6 +27,7 @@ import org.apache.spark.annotation.{DeveloperApi, Since} import org.apache.spark.sql.catalyst.expressions.GenericMutableRow import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.types._ + /** * Trait for a local matrix. */ diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala index 6240a62fc27bb..c88b2d66caf18 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala @@ -81,7 +81,6 @@ class MatricesSuite extends SparkFunSuite { assert(dm1.hashCode === dm.hashCode) assert(dm1 !== dm1.transpose) - val dm2 = Matrices.dense(2, 2, Array(0.0, 2.0, 1.0, 3.0)) assert(dm1 === dm2.transpose) From 28737427ff16c2e4d84540361968c9edef3a79dc Mon Sep 17 00:00:00 2001 From: Vinod K C Date: Wed, 2 Sep 2015 14:52:58 +0530 Subject: [PATCH 3/4] Updated testcase --- .../scala/org/apache/spark/mllib/linalg/MatricesSuite.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala index c88b2d66caf18..7c9778782cdb8 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala @@ -77,8 +77,9 @@ class MatricesSuite extends SparkFunSuite { test("equals and hashCode") { val dm = Matrices.dense(2, 2, Array(0.0, 1.0, 2.0, 3.0)) val dm1 = Matrices.dense(2, 2, Array(0.0, 1.0, 2.0, 3.0)) - assert(dm1 === dm1) + assert(dm1 === dm) assert(dm1.hashCode === dm.hashCode) + assert(dm1 === dm1) assert(dm1 !== dm1.transpose) val dm2 = Matrices.dense(2, 2, Array(0.0, 2.0, 1.0, 3.0)) From 828eecf134fcc845b9eb815eeed33d95dbe7df64 Mon Sep 17 00:00:00 2001 From: Vinod K C Date: Wed, 2 Sep 2015 15:39:02 +0530 Subject: [PATCH 4/4] removed package names --- .../scala/org/apache/spark/mllib/linalg/Matrices.scala | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala index d33974be5afe4..2f8085e0fb29c 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala @@ -278,7 +278,7 @@ class DenseMatrix @Since("1.3.0") ( } override def hashCode: Int = { - val state = Seq(numRows, numCols, java.util.Arrays.hashCode(values), isTransposed.hashCode) + val state = Seq(numRows, numCols, Arrays.hashCode(values), isTransposed.hashCode) state.reduce((a, b) => 31 * a + b) } @@ -559,9 +559,9 @@ class SparseMatrix @Since("1.3.0") ( val state = Seq( numRows, numCols, - java.util.Arrays.hashCode(colPtrs), - java.util.Arrays.hashCode(rowIndices), - java.util.Arrays.hashCode(values)) + Arrays.hashCode(colPtrs), + Arrays.hashCode(rowIndices), + Arrays.hashCode(values)) state.reduce((a, b) => 31 * a + b) }