From b29e2bc98cc1fa48881884207c5907de09ddc979 Mon Sep 17 00:00:00 2001 From: lewuathe Date: Mon, 22 Jun 2015 21:01:54 +0900 Subject: [PATCH] Remove scipy dependencies --- python/pyspark/mllib/util.py | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/python/pyspark/mllib/util.py b/python/pyspark/mllib/util.py index 9d9bb9c14deea..d7ab4edc0fcfc 100644 --- a/python/pyspark/mllib/util.py +++ b/python/pyspark/mllib/util.py @@ -18,18 +18,12 @@ import sys import numpy as np import warnings -try: - import scipy.sparse - _have_scipy = True -except: - # No SciPy in environment, but that's okay - _have_scipy = False if sys.version > '3': xrange = range from pyspark.mllib.common import callMLlibFunc, inherit_doc -from pyspark.mllib.linalg import Vector, Vectors, SparseVector, _convert_to_vector +from pyspark.mllib.linalg import Vectors, SparseVector, _convert_to_vector class MLUtils(object): @@ -183,11 +177,11 @@ def appendBias(data): """ vec = _convert_to_vector(data) if isinstance(vec, SparseVector): - l = scipy.sparse.csc_matrix(np.append(vec.toArray(), 1.0)) - return _convert_to_vector(l.T) - elif isinstance(vec, Vector): - vec = vec.toArray() - return _convert_to_vector(np.append(vec, 1.0).tolist()) + entries = dict(zip(vec.indices, vec.values)) + entries[len(vec)] = 1.0 + return SparseVector(len(vec) + 1, entries) + else: + return _convert_to_vector(np.append(vec.toArray(), 1.0)) @staticmethod def loadVectors(sc, path):