From 316acac64b31894f0e15caa6d612d57aa17402e4 Mon Sep 17 00:00:00 2001 From: Bhargav Mangipudi Date: Sun, 11 Oct 2015 18:10:21 -0500 Subject: [PATCH 1/2] [SPARK-11050][MLLIB] PySpark SparseVector can return wrong index in error message For negative indices in the SparseVector, we update the index value. If we have an incorrect index at this point, the error message has the incorrect *updated* index instead of the original one. This change contains the fix for the same. --- python/pyspark/mllib/linalg/__init__.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/python/pyspark/mllib/linalg/__init__.py b/python/pyspark/mllib/linalg/__init__.py index d903b9030d8ce..a785baa0f0dde 100644 --- a/python/pyspark/mllib/linalg/__init__.py +++ b/python/pyspark/mllib/linalg/__init__.py @@ -764,10 +764,12 @@ def __getitem__(self, index): if not isinstance(index, int): raise TypeError( "Indices must be of type integer, got type %s" % type(index)) + + original_index = index if index < 0: index += self.size if index >= self.size or index < 0: - raise ValueError("Index %d out of bounds." % index) + raise ValueError("Index %d out of bounds." % original_index) insert_index = np.searchsorted(inds, index) if insert_index >= inds.size: From 19bc764bf62837764dc9b97f5b28c5d9535570d2 Mon Sep 17 00:00:00 2001 From: Bhargav Mangipudi Date: Mon, 12 Oct 2015 13:39:09 -0500 Subject: [PATCH 2/2] [SPARK-11050][MLLib] Updated per PR comment. --- python/pyspark/mllib/linalg/__init__.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/python/pyspark/mllib/linalg/__init__.py b/python/pyspark/mllib/linalg/__init__.py index a785baa0f0dde..5276eb41cf29e 100644 --- a/python/pyspark/mllib/linalg/__init__.py +++ b/python/pyspark/mllib/linalg/__init__.py @@ -765,11 +765,10 @@ def __getitem__(self, index): raise TypeError( "Indices must be of type integer, got type %s" % type(index)) - original_index = index + if index >= self.size or index < -self.size: + raise ValueError("Index %d out of bounds." % index) if index < 0: index += self.size - if index >= self.size or index < 0: - raise ValueError("Index %d out of bounds." % original_index) insert_index = np.searchsorted(inds, index) if insert_index >= inds.size: