Skip to content
Permalink
Browse files

[SPARK-25124][ML] VectorSizeHint setSize and getSize don't return val…

…ues backport to 2.3

## What changes were proposed in this pull request?
In feature.py, VectorSizeHint setSize and getSize don't return value. Add return.

(Please fill in changes proposed in this fix)

## How was this patch tested?

Unit Test added

Closes #22228 from huaxingao/spark-25124-2.3.

Authored-by: Huaxin Gao <huaxing@us.ibm.com>
Signed-off-by: Joseph K. Bradley <joseph@databricks.com>
  • Loading branch information...
huaxingao authored and jkbradley committed Aug 24, 2018
1 parent 42c1fdd commit f5983823e9b4a3b4762481306ea071a73f5742fc
Showing with 19 additions and 2 deletions.
  1. +2 −2 python/pyspark/ml/feature.py
  2. +17 −0 python/pyspark/ml/tests.py
@@ -3673,12 +3673,12 @@ def setParams(self, inputCol=None, size=None, handleInvalid="error"):
@since("2.3.0")
def getSize(self):
""" Gets size param, the size of vectors in `inputCol`."""
self.getOrDefault(self.size)
return self.getOrDefault(self.size)

@since("2.3.0")
def setSize(self, value):
""" Sets size param, the size of vectors in `inputCol`."""
self._set(size=value)
return self._set(size=value)


if __name__ == "__main__":
@@ -678,6 +678,23 @@ def test_string_indexer_handle_invalid(self):
expected2 = [Row(id=0, indexed=0.0), Row(id=1, indexed=1.0)]
self.assertEqual(actual2, expected2)

def test_vector_size_hint(self):
df = self.spark.createDataFrame(
[(0, Vectors.dense([0.0, 10.0, 0.5])),
(1, Vectors.dense([1.0, 11.0, 0.5, 0.6])),
(2, Vectors.dense([2.0, 12.0]))],
["id", "vector"])

sizeHint = VectorSizeHint(
inputCol="vector",
handleInvalid="skip")
sizeHint.setSize(3)
self.assertEqual(sizeHint.getSize(), 3)

output = sizeHint.transform(df).head().vector
expected = DenseVector([0.0, 10.0, 0.5])
self.assertEqual(output, expected)


class HasInducedError(Params):

0 comments on commit f598382

Please sign in to comment.
You can’t perform that action at this time.