From ba42cf345056903c224d3fdcf769d1ed5eef516c Mon Sep 17 00:00:00 2001 From: lewuathe Date: Wed, 8 Jul 2015 22:58:35 +0900 Subject: [PATCH 1/2] [SPARK-6266] PySpark SparseVector missing doc for size, indices, values --- python/pyspark/mllib/linalg.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/python/pyspark/mllib/linalg.py b/python/pyspark/mllib/linalg.py index 12d8dbbb92c56..b683783dcbe3b 100644 --- a/python/pyspark/mllib/linalg.py +++ b/python/pyspark/mllib/linalg.py @@ -440,7 +440,7 @@ def __init__(self, size, *args): values (sorted by index). :param size: Size of the vector. - :param args: Non-zero entries, as a dictionary, list of tupes, + :param args: Non-zero entries, as a dictionary, list of tuples, or two sorted lists containing indices and values. >>> SparseVector(4, {1: 1.0, 3: 5.5}) @@ -451,6 +451,7 @@ def __init__(self, size, *args): SparseVector(4, {1: 1.0, 3: 5.5}) """ self.size = int(size) + """ Size of the vector. """ assert 1 <= len(args) <= 2, "must pass either 2 or 3 arguments" if len(args) == 1: pairs = args[0] @@ -458,7 +459,9 @@ def __init__(self, size, *args): pairs = pairs.items() pairs = sorted(pairs) self.indices = np.array([p[0] for p in pairs], dtype=np.int32) + """ The list of index corresponding to non-zero entries. """ self.values = np.array([p[1] for p in pairs], dtype=np.float64) + """ The list of non-zero entries. """ else: if isinstance(args[0], bytes): assert isinstance(args[1], bytes), "values should be string too" From 51d98958047dc86725a0b9abce11378559046aec Mon Sep 17 00:00:00 2001 From: lewuathe Date: Thu, 9 Jul 2015 21:28:51 +0900 Subject: [PATCH 2/2] Update docs --- python/pyspark/mllib/linalg.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/python/pyspark/mllib/linalg.py b/python/pyspark/mllib/linalg.py index 568a4bebecd83..040886f71775b 100644 --- a/python/pyspark/mllib/linalg.py +++ b/python/pyspark/mllib/linalg.py @@ -445,8 +445,10 @@ def __init__(self, size, *args): values (sorted by index). :param size: Size of the vector. - :param args: Non-zero entries, as a dictionary, list of tuples, - or two sorted lists containing indices and values. + :param args: Active entries, as a dictionary {index: value, ...}, + a list of tuples [(index, value), ...], or a list of strictly i + ncreasing indices and a list of corresponding values [index, ...], + [value, ...]. Inactive entries are treated as zeros. >>> SparseVector(4, {1: 1.0, 3: 5.5}) SparseVector(4, {1: 1.0, 3: 5.5}) @@ -464,9 +466,9 @@ def __init__(self, size, *args): pairs = pairs.items() pairs = sorted(pairs) self.indices = np.array([p[0] for p in pairs], dtype=np.int32) - """ The list of index corresponding to non-zero entries. """ + """ A list of indices corresponding to active entries. """ self.values = np.array([p[1] for p in pairs], dtype=np.float64) - """ The list of non-zero entries. """ + """ A list of values corresponding to active entries. """ else: if isinstance(args[0], bytes): assert isinstance(args[1], bytes), "values should be string too"