Skip to content

Commit

Permalink
Check existence of scipy.sparse
Browse files Browse the repository at this point in the history
  • Loading branch information
Lewuathe committed Jun 18, 2015
1 parent 5d555b1 commit d6bd416
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 6 deletions.
8 changes: 4 additions & 4 deletions python/pyspark/mllib/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -835,11 +835,11 @@ def test_append_bias_with_vector(self):

def test_append_bias_with_sp_vector(self):
data = Vectors.sparse(3, {0: 2.0, 2: 2.0})
# Returned value must be scipy.sparse matrix
expected = Vectors.sparse(4, {0: 2.0, 2: 2.0, 3: 1.0})
# Returned value must be SparseVector
ret = MLUtils.appendBias(data)
self.assertEqual(ret.shape, (1, 4))
self.assertEqual(ret.toarray()[0][3], 1.0)
self.assertEqual(type(ret), sp.csc_matrix)
self.assertEqual(ret, expected)
self.assertEqual(type(ret), SparseVector)

def test_load_vectors(self):
import shutil
Expand Down
14 changes: 12 additions & 2 deletions python/pyspark/mllib/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,13 @@

import sys
import numpy as np
import scipy.sparse as sp
import warnings
try:
import scipy.sparse
_have_scipy = True
except:
# No SciPy in environment, but that's okay
_have_scipy = False

if sys.version > '3':
xrange = range
Expand Down Expand Up @@ -178,7 +183,12 @@ def appendBias(data):
"""
vec = _convert_to_vector(data)
if isinstance(vec, SparseVector):
return sp.csc_matrix(np.append(vec.toArray(), 1.0))
if _have_scipy:
l = scipy.sparse.csc_matrix(np.append(vec.toArray(), 1.0))
return _convert_to_vector(l.T)
else:
raise TypeError("Cannot append bias %s into sparce "
"vector because of lack of scipy" % type(vec))
elif isinstance(vec, Vector):
vec = vec.toArray()
return np.append(vec, 1.0).tolist()
Expand Down

0 comments on commit d6bd416

Please sign in to comment.